1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_srem_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_srem_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
12; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
13; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
14; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
15; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
16; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
17; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
18; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
19; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
20; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
21; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
22; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
23; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
24; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
25; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
26; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
27; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
28; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
29; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
30; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
31; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
32; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
33; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
34; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
35; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
36; GISEL-NEXT:    s_setpc_b64 s[30:31]
37;
38; CGP-LABEL: v_srem_i32:
39; CGP:       ; %bb.0:
40; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
42; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
43; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
44; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
45; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
46; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
47; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
48; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
49; CGP-NEXT:    v_rcp_f32_e32 v3, v3
50; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
51; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
52; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
53; CGP-NEXT:    v_mul_lo_u32 v5, 0, v4
54; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
55; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
56; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
57; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
58; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
59; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
60; CGP-NEXT:    v_mul_lo_u32 v3, v3, v1
61; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
62; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
63; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
64; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
65; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
66; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
67; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
68; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
69; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
70; CGP-NEXT:    s_setpc_b64 s[30:31]
71  %result = srem i32 %num, %den
72  ret i32 %result
73}
74
75; FIXME: This is a workaround for not handling uniform VGPR case.
76declare i32 @llvm.amdgcn.readfirstlane(i32)
77
78define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
79; GISEL-LABEL: s_srem_i32:
80; GISEL:       ; %bb.0:
81; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
82; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
83; GISEL-NEXT:    s_add_i32 s0, s0, s2
84; GISEL-NEXT:    s_add_i32 s1, s1, s3
85; GISEL-NEXT:    s_xor_b32 s0, s0, s2
86; GISEL-NEXT:    s_xor_b32 s1, s1, s3
87; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s1
88; GISEL-NEXT:    s_sub_i32 s3, 0, s1
89; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
90; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
91; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
92; GISEL-NEXT:    v_mul_lo_u32 v1, s3, v0
93; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
94; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
95; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
96; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
97; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
98; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
99; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
100; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
101; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
102; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
103; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
104; GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
105; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
106; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
107; GISEL-NEXT:    ; return to shader part epilog
108;
109; CGP-LABEL: s_srem_i32:
110; CGP:       ; %bb.0:
111; CGP-NEXT:    s_ashr_i32 s2, s0, 31
112; CGP-NEXT:    s_ashr_i32 s3, s1, 31
113; CGP-NEXT:    s_add_i32 s0, s0, s2
114; CGP-NEXT:    s_add_i32 s1, s1, s3
115; CGP-NEXT:    s_xor_b32 s0, s0, s2
116; CGP-NEXT:    s_xor_b32 s1, s1, s3
117; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s1
118; CGP-NEXT:    s_sub_i32 s3, 0, s1
119; CGP-NEXT:    v_rcp_f32_e32 v0, v0
120; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
121; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
122; CGP-NEXT:    v_mul_lo_u32 v1, s3, v0
123; CGP-NEXT:    v_mul_lo_u32 v2, 0, v1
124; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
125; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
126; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
127; CGP-NEXT:    v_mul_lo_u32 v1, 0, v0
128; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
129; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
130; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
131; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
132; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
133; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
134; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
135; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
136; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
137; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
138; CGP-NEXT:    v_xor_b32_e32 v0, s2, v0
139; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
140; CGP-NEXT:    v_readfirstlane_b32 s0, v0
141; CGP-NEXT:    ; return to shader part epilog
142  %result = srem i32 %num, %den
143  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
144  ret i32 %readlane
145}
146
147define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
148; GISEL-LABEL: v_srem_v2i32:
149; GISEL:       ; %bb.0:
150; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
152; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
153; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
154; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
155; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
156; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
157; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
158; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
159; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
160; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
161; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
162; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
163; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
164; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
165; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
166; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
167; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
168; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
169; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
170; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
171; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
172; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
173; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
174; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
175; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
176; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
177; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
178; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
179; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
180; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
181; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
182; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
183; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
184; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
185; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
186; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
187; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
188; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
189; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
190; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
191; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
192; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
193; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
194; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
195; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
196; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
197; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
198; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
199; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
200; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
201; GISEL-NEXT:    s_setpc_b64 s[30:31]
202;
203; CGP-LABEL: v_srem_v2i32:
204; CGP:       ; %bb.0:
205; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
207; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
208; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
209; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
210; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
211; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
212; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
213; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
214; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
215; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
216; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
217; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
218; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
219; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
220; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
221; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
222; CGP-NEXT:    v_rcp_f32_e32 v5, v5
223; CGP-NEXT:    v_rcp_f32_e32 v8, v8
224; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
225; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
226; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
227; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
228; CGP-NEXT:    v_mul_lo_u32 v7, v7, v5
229; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
230; CGP-NEXT:    v_mul_lo_u32 v10, 0, v7
231; CGP-NEXT:    v_mul_hi_u32 v7, v5, v7
232; CGP-NEXT:    v_mul_lo_u32 v11, 0, v9
233; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
234; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
235; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
236; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
237; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
238; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
239; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
240; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
241; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
242; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
243; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
244; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
245; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
246; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
247; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
248; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
249; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
250; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
251; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
252; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
253; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
254; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
255; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
256; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
257; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
258; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
259; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
260; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
261; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
262; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
263; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
264; CGP-NEXT:    s_setpc_b64 s[30:31]
265  %result = srem <2 x i32> %num, %den
266  ret <2 x i32> %result
267}
268
269define i32 @v_srem_i32_pow2k_denom(i32 %num) {
270; CHECK-LABEL: v_srem_i32_pow2k_denom:
271; CHECK:       ; %bb.0:
272; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; CHECK-NEXT:    s_movk_i32 s4, 0x1000
274; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
275; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
276; CHECK-NEXT:    v_mov_b32_e32 v3, 0xfffff000
277; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
278; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
279; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
280; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
281; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
282; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
283; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
284; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
285; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
286; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
287; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
288; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
289; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
290; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
291; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
292; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
293; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
294; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
295; CHECK-NEXT:    s_setpc_b64 s[30:31]
296  %result = srem i32 %num, 4096
297  ret i32 %result
298}
299
300define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
301; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
302; GISEL:       ; %bb.0:
303; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
305; GISEL-NEXT:    s_add_i32 s4, 0x1000, 0
306; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
307; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
308; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s4
309; GISEL-NEXT:    s_sub_i32 s5, 0, s4
310; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
311; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
312; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
313; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
314; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
315; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
316; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
317; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
318; GISEL-NEXT:    v_mul_lo_u32 v6, s5, v5
319; GISEL-NEXT:    v_mul_lo_u32 v7, s5, v4
320; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
321; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
322; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
323; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
324; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
325; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
326; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s4
327; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s4
328; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
329; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
330; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
331; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
332; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
333; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
334; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
335; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
336; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
337; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
338; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
339; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
340; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
341; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
342; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
343; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
344; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
345; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
346; GISEL-NEXT:    s_setpc_b64 s[30:31]
347;
348; CGP-LABEL: v_srem_v2i32_pow2k_denom:
349; CGP:       ; %bb.0:
350; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; CGP-NEXT:    s_movk_i32 s4, 0x1000
352; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
353; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
354; CGP-NEXT:    s_movk_i32 s5, 0xf000
355; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
356; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
357; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
358; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x45800000
359; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
360; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
361; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
362; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
363; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
364; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
365; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
366; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
367; CGP-NEXT:    v_mul_lo_u32 v8, s5, v3
368; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
369; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
370; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
371; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
372; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
373; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
374; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
375; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
376; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
377; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
378; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
379; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
380; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
381; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
382; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
383; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
384; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
385; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
386; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
387; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
388; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
389; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
390; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
391; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
392; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
393; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
394; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
395; CGP-NEXT:    s_setpc_b64 s[30:31]
396  %result = srem <2 x i32> %num, <i32 4096, i32 4096>
397  ret <2 x i32> %result
398}
399
400define i32 @v_srem_i32_oddk_denom(i32 %num) {
401; CHECK-LABEL: v_srem_i32_oddk_denom:
402; CHECK:       ; %bb.0:
403; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
405; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
406; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
407; CHECK-NEXT:    v_mov_b32_e32 v3, 0xffed2705
408; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
409; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
410; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
411; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
412; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
413; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
414; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
415; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
416; CHECK-NEXT:    v_mul_lo_u32 v2, v2, s4
417; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
418; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
419; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
420; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
421; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
422; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
423; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
424; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
425; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
426; CHECK-NEXT:    s_setpc_b64 s[30:31]
427  %result = srem i32 %num, 1235195
428  ret i32 %result
429}
430
431define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
432; GISEL-LABEL: v_srem_v2i32_oddk_denom:
433; GISEL:       ; %bb.0:
434; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
436; GISEL-NEXT:    s_add_i32 s4, 0x12d8fb, 0
437; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
438; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
439; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s4
440; GISEL-NEXT:    s_sub_i32 s5, 0, s4
441; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
442; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
443; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
444; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
445; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
446; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
447; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
448; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
449; GISEL-NEXT:    v_mul_lo_u32 v6, s5, v5
450; GISEL-NEXT:    v_mul_lo_u32 v7, s5, v4
451; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
452; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
453; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
454; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
455; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
456; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
457; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s4
458; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s4
459; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
460; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
461; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
462; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
463; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
464; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
465; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
466; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
467; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
468; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
469; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
470; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
471; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
472; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
473; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
474; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
475; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
476; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
477; GISEL-NEXT:    s_setpc_b64 s[30:31]
478;
479; CGP-LABEL: v_srem_v2i32_oddk_denom:
480; CGP:       ; %bb.0:
481; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
483; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
484; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
485; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
486; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
487; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
488; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
489; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x4996c7d8
490; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
491; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
492; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
493; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
494; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
495; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
496; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
497; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
498; CGP-NEXT:    v_mul_lo_u32 v8, s5, v3
499; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
500; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
501; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
502; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
503; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
504; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
505; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
506; CGP-NEXT:    v_mul_lo_u32 v3, v3, s4
507; CGP-NEXT:    v_mul_lo_u32 v4, v4, s4
508; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
509; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
510; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
511; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
512; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
513; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
514; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
515; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
516; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
517; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
518; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
519; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
520; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
521; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
522; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
523; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
524; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
525; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
526; CGP-NEXT:    s_setpc_b64 s[30:31]
527  %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
528  ret <2 x i32> %result
529}
530
531define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
532; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
533; CHECK:       ; %bb.0:
534; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
536; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
537; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
538; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
539; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
540; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
541; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
542; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
543; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
544; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
545; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
546; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
547; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v3
548; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
549; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
550; CHECK-NEXT:    v_mul_hi_u32 v3, v0, v3
551; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
552; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
553; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
554; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
555; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
556; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
557; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
558; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
559; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
560; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
561; CHECK-NEXT:    s_setpc_b64 s[30:31]
562  %shl.y = shl i32 4096, %y
563  %r = srem i32 %x, %shl.y
564  ret i32 %r
565}
566
567define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
568; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
569; GISEL:       ; %bb.0:
570; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GISEL-NEXT:    s_movk_i32 s4, 0x1000
572; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
573; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
574; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
575; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
576; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
577; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
578; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
579; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
580; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
581; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
582; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
583; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
584; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
585; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
586; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
587; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
588; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
589; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
590; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
591; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
592; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
593; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
594; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
595; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
596; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
597; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
598; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
599; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
600; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
601; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
602; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
603; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
604; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
605; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
606; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
607; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
608; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
609; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
610; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
611; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
612; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
613; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
614; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
615; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
616; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
617; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
618; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
619; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
620; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
621; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
622; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
623; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
624; GISEL-NEXT:    s_setpc_b64 s[30:31]
625;
626; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
627; CGP:       ; %bb.0:
628; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; CGP-NEXT:    s_movk_i32 s4, 0x1000
630; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
631; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
632; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
633; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
634; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
635; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
636; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
637; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
638; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
639; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
640; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
641; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
642; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
643; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
644; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
645; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
646; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
647; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
648; CGP-NEXT:    v_rcp_f32_e32 v6, v6
649; CGP-NEXT:    v_rcp_f32_e32 v8, v8
650; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
651; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
652; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
653; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
654; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
655; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
656; CGP-NEXT:    v_mul_lo_u32 v10, 0, v7
657; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
658; CGP-NEXT:    v_mul_lo_u32 v11, 0, v9
659; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
660; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
661; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
662; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
663; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
664; CGP-NEXT:    v_mul_lo_u32 v8, 0, v6
665; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
666; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
667; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
668; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
669; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
670; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
671; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
672; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
673; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
674; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
675; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
676; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
677; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
678; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
679; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
680; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
681; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
682; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
683; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
684; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
685; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
686; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
687; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
688; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
689; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
690; CGP-NEXT:    s_setpc_b64 s[30:31]
691  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
692  %r = srem <2 x i32> %x, %shl.y
693  ret <2 x i32> %r
694}
695
696define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
697; GISEL-LABEL: v_srem_i32_24bit:
698; GISEL:       ; %bb.0:
699; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
701; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
702; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
703; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
704; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
705; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
706; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
707; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
708; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
709; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
710; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
711; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
712; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
713; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
714; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
715; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
716; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
717; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
718; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
719; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
720; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
721; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
722; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
723; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
724; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
725; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
726; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
727; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
728; GISEL-NEXT:    s_setpc_b64 s[30:31]
729;
730; CGP-LABEL: v_srem_i32_24bit:
731; CGP:       ; %bb.0:
732; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733; CGP-NEXT:    s_mov_b32 s4, 0xffffff
734; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
735; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
736; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
737; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
738; CGP-NEXT:    v_rcp_f32_e32 v2, v2
739; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
740; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
741; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
742; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
743; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
744; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
745; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
746; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
747; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
748; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
749; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
750; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
751; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
752; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
753; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
754; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
755; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
756; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
757; CGP-NEXT:    s_setpc_b64 s[30:31]
758  %num.mask = and i32 %num, 16777215
759  %den.mask = and i32 %den, 16777215
760  %result = srem i32 %num.mask, %den.mask
761  ret i32 %result
762}
763
764define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
765; GISEL-LABEL: v_srem_v2i32_24bit:
766; GISEL:       ; %bb.0:
767; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
769; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
770; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
771; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
772; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
773; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
774; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
775; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
776; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
777; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
778; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
779; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
780; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
781; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
782; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
783; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
784; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
785; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
786; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
787; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
788; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
789; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
790; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
791; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
792; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
793; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
794; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
795; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
796; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
797; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
798; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
799; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
800; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
801; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
802; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
803; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
804; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
805; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
806; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
807; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
808; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
809; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
810; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
811; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
812; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
813; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
814; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
815; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
816; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
817; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
818; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
819; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
820; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
821; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
822; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
823; GISEL-NEXT:    s_setpc_b64 s[30:31]
824;
825; CGP-LABEL: v_srem_v2i32_24bit:
826; CGP:       ; %bb.0:
827; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828; CGP-NEXT:    s_mov_b32 s4, 0xffffff
829; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
830; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
831; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
832; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
833; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
834; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
835; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
836; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
837; CGP-NEXT:    v_rcp_f32_e32 v4, v4
838; CGP-NEXT:    v_rcp_f32_e32 v6, v6
839; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
840; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
841; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
842; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
843; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
844; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
845; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
846; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
847; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
848; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
849; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
850; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
851; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
852; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
853; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
854; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
855; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
856; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
857; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
858; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
859; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
860; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
861; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
862; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
863; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
864; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
865; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
866; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
867; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
868; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
869; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
870; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
871; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
872; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
873; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
874; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
875; CGP-NEXT:    s_setpc_b64 s[30:31]
876  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
877  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
878  %result = srem <2 x i32> %num.mask, %den.mask
879  ret <2 x i32> %result
880}
881