1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_urem_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_urem_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
12; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
13; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
14; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
15; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
16; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
17; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
18; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
19; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
20; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
21; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
22; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
23; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
24; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
25; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
26; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
27; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
28; GISEL-NEXT:    s_setpc_b64 s[30:31]
29;
30; CGP-LABEL: v_urem_i32:
31; CGP:       ; %bb.0:
32; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
34; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
35; CGP-NEXT:    v_rcp_f32_e32 v2, v2
36; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
37; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
38; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
39; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
40; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
41; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
42; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
43; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
44; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
45; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
46; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
47; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
48; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
49; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
50; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
51; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
52; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
53; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
54; CGP-NEXT:    s_setpc_b64 s[30:31]
55  %result = urem i32 %num, %den
56  ret i32 %result
57}
58
59; FIXME: This is a workaround for not handling uniform VGPR case.
60declare i32 @llvm.amdgcn.readfirstlane(i32)
61
62define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) {
63; GISEL-LABEL: s_urem_i32:
64; GISEL:       ; %bb.0:
65; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s1
66; GISEL-NEXT:    s_sub_i32 s2, 0, s1
67; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
68; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
69; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
70; GISEL-NEXT:    v_mul_lo_u32 v1, s2, v0
71; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
72; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
73; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
74; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
75; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
76; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
77; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
78; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
79; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
80; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
81; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
82; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
83; GISEL-NEXT:    ; return to shader part epilog
84;
85; CGP-LABEL: s_urem_i32:
86; CGP:       ; %bb.0:
87; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s1
88; CGP-NEXT:    s_sub_i32 s2, 0, s1
89; CGP-NEXT:    v_rcp_f32_e32 v0, v0
90; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
91; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
92; CGP-NEXT:    v_mul_lo_u32 v1, s2, v0
93; CGP-NEXT:    v_mul_lo_u32 v2, 0, v1
94; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
95; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
96; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
97; CGP-NEXT:    v_mul_lo_u32 v1, 0, v0
98; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
99; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
100; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
101; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
102; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
103; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
104; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
105; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
106; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
107; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
108; CGP-NEXT:    v_readfirstlane_b32 s0, v0
109; CGP-NEXT:    ; return to shader part epilog
110  %result = urem i32 %num, %den
111  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
112  ret i32 %readlane
113}
114
115define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) {
116; GISEL-LABEL: v_urem_v2i32:
117; GISEL:       ; %bb.0:
118; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
120; GISEL-NEXT:    s_mov_b32 s4, 0x4f7ffffe
121; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
122; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
123; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
124; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
125; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
126; GISEL-NEXT:    v_mul_f32_e32 v4, s4, v4
127; GISEL-NEXT:    v_mul_f32_e32 v6, s4, v6
128; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
129; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
130; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
131; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
132; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
133; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
134; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
135; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
136; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
137; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
138; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
139; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
140; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
141; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
142; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
143; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
144; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
145; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
146; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
147; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
148; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
149; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
150; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
151; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
152; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
153; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
154; GISEL-NEXT:    s_setpc_b64 s[30:31]
155;
156; CGP-LABEL: v_urem_v2i32:
157; CGP:       ; %bb.0:
158; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
160; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
161; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
162; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
163; CGP-NEXT:    v_rcp_f32_e32 v4, v4
164; CGP-NEXT:    v_rcp_f32_e32 v6, v6
165; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
166; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
167; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
168; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
169; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
170; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
171; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
172; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
173; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
174; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
175; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
176; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
177; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
178; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
179; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
180; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
181; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
182; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
183; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
184; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
185; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
186; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
187; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
188; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
189; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
190; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
191; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
192; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
193; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
194; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
195; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
196; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
197; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
198; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
199; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
200; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
201; CGP-NEXT:    s_setpc_b64 s[30:31]
202  %result = urem <2 x i32> %num, %den
203  ret <2 x i32> %result
204}
205
206define i32 @v_urem_i32_pow2k_denom(i32 %num) {
207; CHECK-LABEL: v_urem_i32_pow2k_denom:
208; CHECK:       ; %bb.0:
209; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; CHECK-NEXT:    s_movk_i32 s4, 0x1000
211; CHECK-NEXT:    v_mov_b32_e32 v1, 0xfffff000
212; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s4
213; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
214; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
215; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
216; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
217; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
218; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
219; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
220; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 12, v1
221; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
222; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
223; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
224; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
225; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
226; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
227; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
228; CHECK-NEXT:    s_setpc_b64 s[30:31]
229  %result = urem i32 %num, 4096
230  ret i32 %result
231}
232
233define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
234; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
235; GISEL:       ; %bb.0:
236; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GISEL-NEXT:    s_movk_i32 s4, 0x1000
238; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, s4
239; GISEL-NEXT:    s_sub_i32 s5, 0, s4
240; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
241; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
242; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
243; GISEL-NEXT:    v_mul_lo_u32 v3, s5, v2
244; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
245; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
246; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v2
247; GISEL-NEXT:    v_mul_hi_u32 v2, v1, v2
248; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
249; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
250; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
251; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
252; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
253; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
254; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
255; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
256; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
257; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
258; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
259; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
260; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
261; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
262; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
263; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
264; GISEL-NEXT:    s_setpc_b64 s[30:31]
265;
266; CGP-LABEL: v_urem_v2i32_pow2k_denom:
267; CGP:       ; %bb.0:
268; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269; CGP-NEXT:    s_movk_i32 s4, 0x1000
270; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
271; CGP-NEXT:    s_mov_b32 s5, 0x4f7ffffe
272; CGP-NEXT:    s_movk_i32 s6, 0xf000
273; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
274; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
275; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
276; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
277; CGP-NEXT:    v_mul_f32_e32 v3, s5, v3
278; CGP-NEXT:    v_mul_f32_e32 v4, s5, v4
279; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
280; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
281; CGP-NEXT:    v_mul_lo_u32 v5, s6, v3
282; CGP-NEXT:    v_mul_lo_u32 v6, s6, v4
283; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
284; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
285; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
286; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
287; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
288; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
289; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
290; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
291; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
292; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
293; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
294; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
295; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
296; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
297; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
298; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
299; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
300; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
301; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
302; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
303; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
304; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
305; CGP-NEXT:    s_setpc_b64 s[30:31]
306  %result = urem <2 x i32> %num, <i32 4096, i32 4096>
307  ret <2 x i32> %result
308}
309
310define i32 @v_urem_i32_oddk_denom(i32 %num) {
311; CHECK-LABEL: v_urem_i32_oddk_denom:
312; CHECK:       ; %bb.0:
313; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
315; CHECK-NEXT:    v_mov_b32_e32 v1, 0xffed2705
316; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s4
317; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
318; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
319; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
320; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
321; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
322; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
323; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
324; CHECK-NEXT:    v_mul_lo_u32 v1, v1, s4
325; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
326; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
327; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
328; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
329; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
330; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
331; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
332; CHECK-NEXT:    s_setpc_b64 s[30:31]
333  %result = urem i32 %num, 1235195
334  ret i32 %result
335}
336
337define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
338; GISEL-LABEL: v_urem_v2i32_oddk_denom:
339; GISEL:       ; %bb.0:
340; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341; GISEL-NEXT:    s_mov_b32 s4, 0x12d8fb
342; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, s4
343; GISEL-NEXT:    s_sub_i32 s5, 0, s4
344; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
345; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
346; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
347; GISEL-NEXT:    v_mul_lo_u32 v3, s5, v2
348; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
349; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
350; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v2
351; GISEL-NEXT:    v_mul_hi_u32 v2, v1, v2
352; GISEL-NEXT:    v_mul_lo_u32 v3, v3, s4
353; GISEL-NEXT:    v_mul_lo_u32 v2, v2, s4
354; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
355; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
356; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
357; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
358; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
359; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
360; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
361; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
362; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
363; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
364; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
365; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
366; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
367; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
368; GISEL-NEXT:    s_setpc_b64 s[30:31]
369;
370; CGP-LABEL: v_urem_v2i32_oddk_denom:
371; CGP:       ; %bb.0:
372; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
374; CGP-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
375; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
376; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
377; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
378; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
379; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
380; CGP-NEXT:    v_mul_lo_u32 v4, s5, v3
381; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
382; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
383; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
384; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
385; CGP-NEXT:    v_mul_lo_u32 v4, v4, s4
386; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
387; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
388; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
389; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
390; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
391; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
392; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
393; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
394; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
395; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
396; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
397; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
398; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
399; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
400; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
401; CGP-NEXT:    s_setpc_b64 s[30:31]
402  %result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
403  ret <2 x i32> %result
404}
405
406define i32 @v_urem_i32_pow2_shl_denom(i32 %x, i32 %y) {
407; CHECK-LABEL: v_urem_i32_pow2_shl_denom:
408; CHECK:       ; %bb.0:
409; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
411; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v1
412; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
413; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
414; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
415; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
416; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
417; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
418; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
419; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
420; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
421; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
422; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
423; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
424; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
425; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
426; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
427; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
428; CHECK-NEXT:    s_setpc_b64 s[30:31]
429  %shl.y = shl i32 4096, %y
430  %r = urem i32 %x, %shl.y
431  ret i32 %r
432}
433
434define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
435; GISEL-LABEL: v_urem_v2i32_pow2_shl_denom:
436; GISEL:       ; %bb.0:
437; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GISEL-NEXT:    s_movk_i32 s4, 0x1000
439; GISEL-NEXT:    s_mov_b32 s5, 0x4f7ffffe
440; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
441; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
442; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
443; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
444; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
445; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
446; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
447; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
448; GISEL-NEXT:    v_mul_f32_e32 v4, s5, v4
449; GISEL-NEXT:    v_mul_f32_e32 v6, s5, v6
450; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
451; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
452; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
453; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
454; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
455; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
456; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
457; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
458; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
459; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
460; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
461; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
462; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
463; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
464; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
465; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
466; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
467; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
468; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
469; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
470; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
471; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
472; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
473; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
474; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
475; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
476; GISEL-NEXT:    s_setpc_b64 s[30:31]
477;
478; CGP-LABEL: v_urem_v2i32_pow2_shl_denom:
479; CGP:       ; %bb.0:
480; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481; CGP-NEXT:    s_movk_i32 s4, 0x1000
482; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
483; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
484; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
485; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
486; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
487; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
488; CGP-NEXT:    v_rcp_f32_e32 v4, v4
489; CGP-NEXT:    v_rcp_f32_e32 v6, v6
490; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
491; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
492; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
493; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
494; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
495; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
496; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
497; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
498; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
499; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
500; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
501; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
502; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
503; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
504; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
505; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
506; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
507; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
508; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
509; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
510; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
511; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
512; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
513; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
514; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
515; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
516; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
517; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
518; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
519; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
520; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
521; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
522; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
523; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
524; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
525; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
526; CGP-NEXT:    s_setpc_b64 s[30:31]
527  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
528  %r = urem <2 x i32> %x, %shl.y
529  ret <2 x i32> %r
530}
531
532define i32 @v_urem_i32_24bit(i32 %num, i32 %den) {
533; GISEL-LABEL: v_urem_i32_24bit:
534; GISEL:       ; %bb.0:
535; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
537; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
538; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
539; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
540; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
541; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
542; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
543; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
544; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
545; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
546; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
547; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
548; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
549; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
550; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
551; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
552; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
553; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
554; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
555; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
556; GISEL-NEXT:    s_setpc_b64 s[30:31]
557;
558; CGP-LABEL: v_urem_i32_24bit:
559; CGP:       ; %bb.0:
560; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; CGP-NEXT:    s_mov_b32 s4, 0xffffff
562; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
563; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
564; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
565; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
566; CGP-NEXT:    v_rcp_f32_e32 v2, v2
567; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
568; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
569; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
570; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
571; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
572; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
573; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
574; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
575; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
576; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
577; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
578; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
579; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
580; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
581; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
582; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
583; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
584; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
585; CGP-NEXT:    s_setpc_b64 s[30:31]
586  %num.mask = and i32 %num, 16777215
587  %den.mask = and i32 %den, 16777215
588  %result = urem i32 %num.mask, %den.mask
589  ret i32 %result
590}
591
592define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
593; GISEL-LABEL: v_urem_v2i32_24bit:
594; GISEL:       ; %bb.0:
595; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
597; GISEL-NEXT:    s_mov_b32 s5, 0x4f7ffffe
598; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
599; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
600; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
601; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
602; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
603; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
604; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
605; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
606; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
607; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
608; GISEL-NEXT:    v_mul_f32_e32 v4, s5, v4
609; GISEL-NEXT:    v_mul_f32_e32 v6, s5, v6
610; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
611; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
612; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
613; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
614; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
615; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
616; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
617; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
618; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
619; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
620; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
621; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
622; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
623; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
624; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
625; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
626; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
627; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
628; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
629; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
630; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
631; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
632; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
633; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
634; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
635; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
636; GISEL-NEXT:    s_setpc_b64 s[30:31]
637;
638; CGP-LABEL: v_urem_v2i32_24bit:
639; CGP:       ; %bb.0:
640; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641; CGP-NEXT:    s_mov_b32 s4, 0xffffff
642; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
643; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
644; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
645; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
646; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
647; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
648; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
649; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
650; CGP-NEXT:    v_rcp_f32_e32 v4, v4
651; CGP-NEXT:    v_rcp_f32_e32 v6, v6
652; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
653; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
654; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
655; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
656; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
657; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
658; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
659; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
660; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
661; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
662; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
663; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
664; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
665; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
666; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
667; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
668; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
669; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
670; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
671; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
672; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
673; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
674; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
675; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
676; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
677; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
678; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
679; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
680; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
681; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
682; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
683; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
684; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
685; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
686; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
687; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
688; CGP-NEXT:    s_setpc_b64 s[30:31]
689  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
690  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
691  %result = urem <2 x i32> %num.mask, %den.mask
692  ret <2 x i32> %result
693}
694