1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_urem_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_urem_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
12; CHECK-NEXT:    v_mov_b32_e32 v4, 0
13; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
14; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
15; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
16; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
17; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
18; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
19; CHECK-NEXT:    s_cbranch_execz BB0_2
20; CHECK-NEXT:  ; %bb.1:
21; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
22; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
23; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
24; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
25; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
26; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
27; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
28; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
29; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
30; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
31; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
32; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
33; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
34; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v4
35; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v4
36; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
37; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
38; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v9
39; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v9
40; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
41; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
42; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v8
43; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v8
44; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v8
45; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
46; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
47; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
48; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
49; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
50; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
51; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
52; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
53; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
54; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
55; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
56; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
57; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
58; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
59; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
60; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
61; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
62; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
63; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v4
64; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
65; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v4
66; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
67; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
68; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v8
69; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
70; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
71; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
72; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
73; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
74; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v6
75; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
76; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
77; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
78; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
79; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
80; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
81; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
82; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
83; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
84; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
85; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
86; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
87; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
88; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
89; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
90; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
91; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
92; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
93; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
94; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
95; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
96; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
97; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
98; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
99; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
100; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
101; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
102; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
103; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
104; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
105; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
106; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
107; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
108; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
109; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
110; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
111; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
112; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
113; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
114; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v4
115; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
116; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
117; CHECK-NEXT:    v_mul_lo_u32 v5, v2, v5
118; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
119; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
120; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v7
121; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v4, vcc
122; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
123; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
124; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
125; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
126; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
127; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
128; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
129; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
130; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v5, v2
131; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
132; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
133; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
134; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
135; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v3
136; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
137; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v2
138; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
139; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
140; CHECK-NEXT:    v_cndmask_b32_e32 v3, v10, v9, vcc
141; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
142; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v11, vcc
143; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
144; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
145; CHECK-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
146; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v1, vcc
147; CHECK-NEXT:  BB0_2: ; %Flow
148; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
149; CHECK-NEXT:    s_xor_b64 exec, exec, s[4:5]
150; CHECK-NEXT:    s_cbranch_execz BB0_4
151; CHECK-NEXT:  ; %bb.3:
152; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
153; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
154; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
155; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
156; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
157; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
158; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
159; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
160; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
161; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
162; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
163; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
164; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
165; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
166; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
167; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
168; CHECK-NEXT:    v_cndmask_b32_e32 v4, v0, v1, vcc
169; CHECK-NEXT:    v_mov_b32_e32 v5, 0
170; CHECK-NEXT:  BB0_4:
171; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
172; CHECK-NEXT:    v_mov_b32_e32 v0, v4
173; CHECK-NEXT:    v_mov_b32_e32 v1, v5
174; CHECK-NEXT:    s_setpc_b64 s[30:31]
175  %result = urem i64 %num, %den
176  ret i64 %result
177}
178
179; FIXME: This is a workaround for not handling uniform VGPR case.
180declare i32 @llvm.amdgcn.readfirstlane(i32)
181
182define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
183; CHECK-LABEL: s_urem_i64:
184; CHECK:       ; %bb.0:
185; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
186; CHECK-NEXT:    s_mov_b32 s6, 0
187; CHECK-NEXT:    s_mov_b32 s7, -1
188; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
189; CHECK-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], 0
190; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
191; CHECK-NEXT:    s_xor_b64 vcc, s[4:5], s[6:7]
192; CHECK-NEXT:    s_mov_b32 s4, 1
193; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
194; CHECK-NEXT:    s_cbranch_vccz BB1_2
195; CHECK-NEXT:  ; %bb.1:
196; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
197; CHECK-NEXT:    v_mov_b32_e32 v1, s3
198; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
199; CHECK-NEXT:    s_sub_u32 s6, 0, s2
200; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
201; CHECK-NEXT:    v_mov_b32_e32 v3, s1
202; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
203; CHECK-NEXT:    s_and_b32 s4, s4, 1
204; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
205; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
206; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
207; CHECK-NEXT:    s_subb_u32 s7, 0, s3
208; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
209; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
210; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
211; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
212; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
213; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
214; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
215; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
216; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
217; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
218; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
219; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
220; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
221; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
222; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
223; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
224; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
225; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
226; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
227; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
228; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
229; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
230; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
231; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
232; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
233; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
234; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
235; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
236; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
237; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
238; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
239; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
240; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
241; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
242; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
243; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
244; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
245; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
246; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
247; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
248; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
249; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
250; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
251; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
252; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
253; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
254; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
255; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
256; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
257; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
258; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
259; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
260; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
261; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
262; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
263; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
264; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
265; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
266; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
267; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
268; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
269; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
270; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
271; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
272; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
273; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
274; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
275; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
276; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
277; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
278; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
279; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
280; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
281; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
282; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
283; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
284; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
285; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
286; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
287; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
288; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
289; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
290; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
291; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
292; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
293; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
294; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
295; CHECK-NEXT:    v_mul_hi_u32 v0, s2, v0
296; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
297; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
298; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
299; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
300; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, s0, v5
301; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v0, vcc
302; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], s1, v0
303; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v2
304; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
305; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
306; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
307; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v1, vcc
308; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
309; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
310; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v2
311; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
312; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
313; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
314; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, s2, v3
315; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
316; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
317; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
318; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
319; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
320; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
321; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
322; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
323; CHECK-NEXT:    s_mov_b32 s4, 0
324; CHECK-NEXT:  BB1_2: ; %Flow
325; CHECK-NEXT:    s_and_b32 s1, s4, 1
326; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
327; CHECK-NEXT:    s_cbranch_scc0 BB1_4
328; CHECK-NEXT:  ; %bb.3:
329; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
330; CHECK-NEXT:    s_sub_i32 s1, 0, s2
331; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
332; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
333; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
334; CHECK-NEXT:    v_mul_lo_u32 v1, s1, v0
335; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
336; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
337; CHECK-NEXT:    v_mul_hi_u32 v0, s0, v0
338; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s2
339; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
340; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
341; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
342; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
343; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
344; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
345; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
346; CHECK-NEXT:  BB1_4:
347; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
348; CHECK-NEXT:    s_mov_b32 s1, s0
349; CHECK-NEXT:    ; return to shader part epilog
350  %result = urem i64 %num, %den
351  %cast = bitcast i64 %result to <2 x i32>
352  %elt.0 = extractelement <2 x i32> %cast, i32 0
353  %elt.1 = extractelement <2 x i32> %cast, i32 1
354  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
355  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
356  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
357  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
358  %cast.back = bitcast <2 x i32> %ins.1 to i64
359  ret i64 %cast.back
360}
361
362define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
363; GISEL-LABEL: v_urem_v2i64:
364; GISEL:       ; %bb.0:
365; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
367; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
368; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
369; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
370; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
371; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
372; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
373; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
374; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
375; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
376; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
377; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
378; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
379; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
380; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
381; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
382; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
383; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
384; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
385; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
386; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
387; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
388; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
389; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
390; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
391; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
392; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
393; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
394; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
395; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
396; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
397; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
398; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
399; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
400; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
401; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
402; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
403; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
404; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
405; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
406; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
407; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
408; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
409; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
410; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
411; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
412; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
413; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
414; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
415; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
416; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
417; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
418; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
419; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
420; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
421; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
422; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
423; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
424; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
425; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
426; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
427; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
428; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
429; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
430; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
431; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
432; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
433; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
434; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
435; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
436; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
437; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
438; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
439; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
440; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
441; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
442; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
443; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
444; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
445; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
446; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
447; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
448; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
449; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
450; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
451; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
452; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
453; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
454; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
455; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
456; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
457; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
458; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
459; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
460; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
461; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
462; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
463; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
464; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
465; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
466; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
467; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
468; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
469; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
470; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
471; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
472; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
473; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
474; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
475; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
476; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
477; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
478; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
479; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
480; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
481; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
482; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
483; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
484; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
485; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
486; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
487; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
488; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
489; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
490; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
491; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
492; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
493; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
494; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
495; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
496; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
497; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
498; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
499; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
500; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
501; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
502; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
503; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
504; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
505; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
506; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
507; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
508; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
509; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
510; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
511; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
512; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
513; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
514; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
515; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
516; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
517; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
518; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
519; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
520; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
521; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
522; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
523; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
524; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
525; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
526; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
527; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
528; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
529; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
530; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
531; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
532; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
533; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
534; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
535; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
536; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
537; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
538; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
539; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
540; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
541; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
542; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
543; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
544; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
545; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
546; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
547; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
548; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
549; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
550; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
551; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
552; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
553; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
554; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
555; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
556; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
557; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
558; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
559; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
560; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
561; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
562; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
563; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
564; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
565; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
566; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
567; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
568; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
569; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
570; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
571; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
572; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
573; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
574; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
575; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
576; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
577; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
578; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
579; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
580; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
581; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
582; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
583; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
584; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
585; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
586; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
587; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
588; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
589; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
590; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
591; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
592; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
593; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
594; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
595; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
596; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
597; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
598; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
599; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
600; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
601; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
602; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
603; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
604; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
605; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
606; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
607; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
608; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
609; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
610; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
611; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
612; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
613; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
614; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
615; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
616; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
617; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
618; GISEL-NEXT:    s_setpc_b64 s[30:31]
619;
620; CGP-LABEL: v_urem_v2i64:
621; CGP:       ; %bb.0:
622; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623; CGP-NEXT:    v_mov_b32_e32 v8, v0
624; CGP-NEXT:    v_mov_b32_e32 v9, v1
625; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
626; CGP-NEXT:    v_mov_b32_e32 v0, 0
627; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
628; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
629; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
630; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
631; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
632; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
633; CGP-NEXT:    s_cbranch_execz BB2_2
634; CGP-NEXT:  ; %bb.1:
635; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
636; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
637; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
638; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
639; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
640; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
641; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
642; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
643; CGP-NEXT:    v_trunc_f32_e32 v1, v1
644; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
645; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
646; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
647; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
648; CGP-NEXT:    v_mul_lo_u32 v13, v10, v0
649; CGP-NEXT:    v_mul_lo_u32 v14, v11, v0
650; CGP-NEXT:    v_mul_hi_u32 v15, v10, v0
651; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
652; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
653; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
654; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
655; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
656; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
657; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
658; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
659; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
660; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
661; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
662; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
663; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
664; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
665; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
666; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
667; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
668; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
669; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
670; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
671; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
672; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
673; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
674; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
675; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
676; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
677; CGP-NEXT:    v_mul_lo_u32 v12, v10, v0
678; CGP-NEXT:    v_mul_lo_u32 v11, v11, v0
679; CGP-NEXT:    v_mul_hi_u32 v14, v10, v0
680; CGP-NEXT:    v_mul_lo_u32 v10, v10, v13
681; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
682; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
683; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
684; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
685; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
686; CGP-NEXT:    v_mul_lo_u32 v11, v0, v10
687; CGP-NEXT:    v_mul_lo_u32 v14, v13, v10
688; CGP-NEXT:    v_mul_hi_u32 v17, v0, v10
689; CGP-NEXT:    v_mul_hi_u32 v10, v13, v10
690; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
691; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
692; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
693; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
694; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
695; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
696; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
697; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
698; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
699; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
700; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
701; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
702; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
703; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
704; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
705; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
706; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
707; CGP-NEXT:    v_mul_lo_u32 v10, v9, v0
708; CGP-NEXT:    v_mul_hi_u32 v11, v8, v0
709; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
710; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
711; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
712; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
713; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
714; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
715; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
716; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
717; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
718; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
719; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
720; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
721; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
722; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
723; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
724; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
725; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
726; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
727; CGP-NEXT:    v_mul_lo_u32 v11, v4, v0
728; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
729; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
730; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
731; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
732; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
733; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
734; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v11
735; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v9, v0, vcc
736; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v9, v0
737; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
738; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
739; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
740; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
741; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
742; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v5
743; CGP-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
744; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v1, v4
745; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
746; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
747; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
748; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
749; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v5
750; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
751; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v11, v4
752; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
753; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v5
754; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v13, vcc
755; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
756; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
757; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
758; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
759; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
760; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v11, vcc
761; CGP-NEXT:  BB2_2: ; %Flow2
762; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
763; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
764; CGP-NEXT:    s_cbranch_execz BB2_4
765; CGP-NEXT:  ; %bb.3:
766; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
767; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
768; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
769; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
770; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
771; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
772; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
773; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
774; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
775; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
776; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v8, v0
777; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
778; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
779; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
780; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
781; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
782; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
783; CGP-NEXT:    v_mov_b32_e32 v1, 0
784; CGP-NEXT:  BB2_4:
785; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
786; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
787; CGP-NEXT:    v_mov_b32_e32 v4, 0
788; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
789; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
790; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
791; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
792; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
793; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
794; CGP-NEXT:    s_cbranch_execz BB2_6
795; CGP-NEXT:  ; %bb.5:
796; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
797; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
798; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
799; CGP-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
800; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
801; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
802; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
803; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
804; CGP-NEXT:    v_trunc_f32_e32 v5, v5
805; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
806; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
807; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
808; CGP-NEXT:    v_mul_lo_u32 v10, v8, v5
809; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
810; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
811; CGP-NEXT:    v_mul_hi_u32 v13, v8, v4
812; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
813; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
814; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
815; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
816; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
817; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
818; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
819; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
820; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
821; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
822; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
823; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
824; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
825; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
826; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
827; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
828; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
829; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
830; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
831; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
832; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
833; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
834; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
835; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
836; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
837; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
838; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
839; CGP-NEXT:    v_mul_lo_u32 v9, v9, v4
840; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
841; CGP-NEXT:    v_mul_lo_u32 v8, v8, v11
842; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
843; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
844; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
845; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
846; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
847; CGP-NEXT:    v_mul_lo_u32 v9, v4, v8
848; CGP-NEXT:    v_mul_lo_u32 v12, v11, v8
849; CGP-NEXT:    v_mul_hi_u32 v15, v4, v8
850; CGP-NEXT:    v_mul_hi_u32 v8, v11, v8
851; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
852; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
853; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
854; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
855; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
856; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
857; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
858; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
859; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
860; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
861; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
862; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
863; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
864; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
865; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
866; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
867; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
868; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
869; CGP-NEXT:    v_mul_hi_u32 v9, v2, v4
870; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
871; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
872; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
873; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
874; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
875; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
876; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
877; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
878; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
879; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
880; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
881; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
882; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
883; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
884; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
885; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
886; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
887; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
888; CGP-NEXT:    v_mul_lo_u32 v9, v6, v4
889; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
890; CGP-NEXT:    v_mul_hi_u32 v4, v6, v4
891; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
892; CGP-NEXT:    v_mul_lo_u32 v5, v6, v5
893; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
894; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
895; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v9
896; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v4, vcc
897; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
898; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
899; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
900; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
901; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
902; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
903; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
904; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
905; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v5, v6
906; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
907; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
908; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
909; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
910; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v7
911; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
912; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v9, v6
913; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
914; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v7
915; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v11, vcc
916; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
917; CGP-NEXT:    v_cndmask_b32_e32 v7, v9, v13, vcc
918; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
919; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
920; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
921; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v3, vcc
922; CGP-NEXT:  BB2_6: ; %Flow
923; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
924; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
925; CGP-NEXT:    s_cbranch_execz BB2_8
926; CGP-NEXT:  ; %bb.7:
927; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
928; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v6
929; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
930; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
931; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
932; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
933; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
934; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
935; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
936; CGP-NEXT:    v_mul_lo_u32 v3, v3, v6
937; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
938; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
939; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
940; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
941; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
942; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
943; CGP-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
944; CGP-NEXT:    v_mov_b32_e32 v5, 0
945; CGP-NEXT:  BB2_8:
946; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
947; CGP-NEXT:    v_mov_b32_e32 v2, v4
948; CGP-NEXT:    v_mov_b32_e32 v3, v5
949; CGP-NEXT:    s_setpc_b64 s[30:31]
950  %result = urem <2 x i64> %num, %den
951  ret <2 x i64> %result
952}
953
954define i64 @v_urem_i64_pow2k_denom(i64 %num) {
955; CHECK-LABEL: v_urem_i64_pow2k_denom:
956; CHECK:       ; %bb.0:
957; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958; CHECK-NEXT:    s_movk_i32 s6, 0x1000
959; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
960; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
961; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
962; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
963; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
964; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
965; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
966; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
967; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
968; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
969; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
970; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
971; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
972; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
973; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
974; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
975; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
976; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
977; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
978; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
979; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
980; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
981; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
982; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
983; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
984; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
985; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
986; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
987; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
988; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
989; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
990; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
991; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
992; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
993; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
994; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
995; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
996; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
997; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
998; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
999; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
1000; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
1001; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
1002; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
1003; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
1004; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
1005; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1006; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
1007; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1008; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
1009; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
1010; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
1011; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
1012; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
1013; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
1014; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1015; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
1016; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1017; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
1018; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1019; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
1020; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1021; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1022; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
1023; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
1024; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1025; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1026; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1027; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
1028; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1029; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1030; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
1031; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
1032; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1033; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
1034; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
1035; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
1036; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
1037; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1038; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1039; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1040; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1041; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1042; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1043; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1044; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1045; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1046; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1047; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1048; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1049; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1050; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
1051; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
1052; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
1053; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1054; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
1055; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
1056; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1057; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
1058; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
1059; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
1060; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
1061; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1062; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
1063; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1064; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1065; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1066; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1067; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
1068; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1069; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
1070; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1071; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1072; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1073; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
1074; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1075; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1076; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
1077; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1078; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1079; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1080; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1081; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1082; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1083; CHECK-NEXT:    s_setpc_b64 s[30:31]
1084  %result = urem i64 %num, 4096
1085  ret i64 %result
1086}
1087
1088define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
1089; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
1090; GISEL:       ; %bb.0:
1091; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092; GISEL-NEXT:    s_movk_i32 s10, 0x1000
1093; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
1094; GISEL-NEXT:    s_sub_u32 s8, 0, s10
1095; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1096; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
1097; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1098; GISEL-NEXT:    s_and_b32 s4, s4, 1
1099; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1100; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1101; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1102; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1103; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1104; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1105; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1106; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1107; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1108; GISEL-NEXT:    s_sub_u32 s11, 0, s10
1109; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1110; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1111; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1112; GISEL-NEXT:    s_and_b32 s4, s4, 1
1113; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1114; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1115; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1116; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1117; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1118; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1119; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1120; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1121; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
1122; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1123; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1124; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
1125; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1126; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1127; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1128; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1129; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1130; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1131; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1132; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1133; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1134; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1135; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1136; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1137; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1138; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1139; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1140; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1141; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1142; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1143; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1144; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1145; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1146; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1147; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1148; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1149; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1150; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1151; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1152; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1153; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1154; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1155; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1156; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1157; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1158; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1159; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1160; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1161; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1162; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1163; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1164; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1165; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1166; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1167; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1168; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1169; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1170; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1171; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1172; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1173; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1174; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1175; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1176; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1177; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1178; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1179; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
1180; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1181; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1182; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1183; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1184; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1185; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
1186; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1187; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1188; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
1189; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
1190; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1191; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
1192; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
1193; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1194; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
1195; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1196; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
1197; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
1198; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1199; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1200; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
1201; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
1202; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1203; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1204; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1205; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
1206; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
1207; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
1208; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
1209; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
1210; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
1211; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1212; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1213; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1214; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1215; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1216; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1217; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1218; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1219; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1220; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1221; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1222; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1223; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1224; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1225; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1226; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1227; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1228; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1229; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1230; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1231; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1232; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1233; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1234; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1235; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1236; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1237; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1238; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1239; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1240; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
1241; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1242; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1243; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1244; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1245; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
1246; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
1247; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1248; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
1249; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
1250; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
1251; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1252; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
1253; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
1254; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
1255; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1256; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1257; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1258; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1259; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1260; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1261; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1262; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1263; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1264; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1265; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1266; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1267; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1268; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1269; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1270; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1271; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1272; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1273; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1274; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1275; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1276; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1277; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1278; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1279; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1280; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1281; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
1282; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
1283; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
1284; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1285; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
1286; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
1287; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
1288; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1289; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1290; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
1291; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
1292; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1293; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1294; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1295; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1296; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1297; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1298; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1299; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1300; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1301; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1302; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1303; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1304; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1305; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1306; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1307; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1308; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1309; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1310; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1311; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1312; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1313; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1314; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1315; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1316; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1317; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1318; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1319; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1320; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1321; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1322; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1323; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1324; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1325; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1326; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1327; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1328; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1329; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1330; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1331; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1332; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1333; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1334; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1335; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1336; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1337; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1338; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1339; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1340; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1341; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1342; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1343; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1344; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1345; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1346; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1347; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1348; GISEL-NEXT:    s_setpc_b64 s[30:31]
1349;
1350; CGP-LABEL: v_urem_v2i64_pow2k_denom:
1351; CGP:       ; %bb.0:
1352; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353; CGP-NEXT:    s_movk_i32 s10, 0x1000
1354; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
1355; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
1356; CGP-NEXT:    s_mov_b32 s8, 0xfffff000
1357; CGP-NEXT:    v_mov_b32_e32 v6, v5
1358; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
1359; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
1360; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
1361; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1362; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1363; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1364; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1365; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1366; CGP-NEXT:    v_trunc_f32_e32 v6, v6
1367; CGP-NEXT:    v_trunc_f32_e32 v7, v7
1368; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1369; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
1370; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1371; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
1372; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
1373; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
1374; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
1375; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
1376; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
1377; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
1378; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
1379; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
1380; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
1381; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
1382; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1383; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
1384; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
1385; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
1386; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1387; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
1388; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
1389; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
1390; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1391; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1392; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
1393; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
1394; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
1395; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
1396; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
1397; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1398; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1399; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1400; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
1401; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
1402; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
1403; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1404; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1405; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1406; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1407; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1408; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1409; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1410; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1411; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1412; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1413; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1414; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1415; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1416; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1417; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1418; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1419; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1420; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1421; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1422; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1423; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1424; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1425; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1426; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1427; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1428; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1429; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1430; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1431; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
1432; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
1433; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
1434; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1435; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1436; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1437; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
1438; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
1439; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
1440; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
1441; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
1442; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
1443; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
1444; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
1445; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1446; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
1447; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1448; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
1449; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
1450; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1451; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1452; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
1453; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
1454; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1455; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1456; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1457; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
1458; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
1459; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
1460; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
1461; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
1462; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
1463; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1464; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1465; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1466; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1467; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1468; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1469; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1470; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1471; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1472; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1473; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1474; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1475; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1476; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1477; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1478; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1479; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1480; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1481; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1482; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1483; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1484; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1485; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1486; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1487; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1488; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1489; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1490; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1491; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1492; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
1493; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
1494; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
1495; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1496; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1497; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
1498; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
1499; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
1500; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
1501; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
1502; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
1503; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
1504; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
1505; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
1506; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
1507; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
1508; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1509; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1510; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1511; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1512; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1513; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1514; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1515; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1516; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1517; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1518; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1519; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1520; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1521; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1522; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1523; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1524; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1525; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1526; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1527; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1528; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1529; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1530; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1531; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1532; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1533; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
1534; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
1535; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
1536; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1537; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
1538; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
1539; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
1540; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1541; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1542; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
1543; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
1544; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1545; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1546; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1547; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1548; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1549; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1550; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1551; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1552; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1553; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1554; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1555; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1556; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1557; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1558; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1559; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1560; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1561; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1562; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1563; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1564; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1565; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1566; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1567; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1568; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1569; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1570; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1571; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1572; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1573; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1574; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1575; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1576; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1577; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1578; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1579; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1580; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1581; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1582; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1583; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1584; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1585; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1586; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1587; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1588; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1589; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1590; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1591; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1592; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1593; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1594; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1595; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1596; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1597; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1598; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1599; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1600; CGP-NEXT:    s_setpc_b64 s[30:31]
1601  %result = urem <2 x i64> %num, <i64 4096, i64 4096>
1602  ret <2 x i64> %result
1603}
1604
1605define i64 @v_urem_i64_oddk_denom(i64 %num) {
1606; CHECK-LABEL: v_urem_i64_oddk_denom:
1607; CHECK:       ; %bb.0:
1608; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1609; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
1610; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
1611; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
1612; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
1613; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
1614; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
1615; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
1616; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
1617; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
1618; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
1619; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
1620; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
1621; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
1622; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
1623; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
1624; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
1625; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1626; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
1627; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
1628; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
1629; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
1630; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
1631; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
1632; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1633; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
1634; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1635; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1636; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
1637; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1638; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1639; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1640; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
1641; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1642; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1643; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
1644; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1645; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1646; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1647; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1648; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
1649; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
1650; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
1651; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
1652; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
1653; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
1654; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
1655; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
1656; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1657; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
1658; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1659; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
1660; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
1661; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
1662; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
1663; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
1664; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
1665; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1666; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
1667; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1668; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
1669; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1670; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
1671; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1672; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1673; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
1674; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
1675; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1676; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1677; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1678; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
1679; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1680; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1681; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
1682; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
1683; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1684; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
1685; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
1686; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
1687; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
1688; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1689; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1690; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1691; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1692; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1693; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1694; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1695; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1696; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1697; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1698; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1699; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1700; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1701; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
1702; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
1703; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
1704; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1705; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
1706; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
1707; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1708; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
1709; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
1710; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
1711; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
1712; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1713; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
1714; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1715; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1716; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1717; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1718; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
1719; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1720; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
1721; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1722; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1723; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1724; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
1725; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1726; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1727; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
1728; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1729; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1730; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1731; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1732; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1733; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1734; CHECK-NEXT:    s_setpc_b64 s[30:31]
1735  %result = urem i64 %num, 1235195
1736  ret i64 %result
1737}
1738
1739define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
1740; GISEL-LABEL: v_urem_v2i64_oddk_denom:
1741; GISEL:       ; %bb.0:
1742; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1743; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
1744; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
1745; GISEL-NEXT:    s_sub_u32 s8, 0, s10
1746; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1747; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
1748; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1749; GISEL-NEXT:    s_and_b32 s4, s4, 1
1750; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1751; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1752; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1753; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1754; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1755; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1756; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1757; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1758; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1759; GISEL-NEXT:    s_sub_u32 s11, 0, s10
1760; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1761; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1762; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1763; GISEL-NEXT:    s_and_b32 s4, s4, 1
1764; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1765; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1766; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1767; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1768; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1769; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1770; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1771; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1772; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
1773; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1774; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1775; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
1776; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1777; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1778; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1779; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1780; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1781; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1782; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1783; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1784; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1785; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1786; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1787; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1788; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1789; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1790; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1791; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1792; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1793; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1794; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1795; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1796; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1797; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1798; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1799; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1800; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1801; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1802; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1803; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1804; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1805; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1806; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1807; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1808; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1809; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1810; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1811; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1812; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1813; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1814; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1815; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1816; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1817; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1818; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1819; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1820; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1821; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1822; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1823; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1824; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1825; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1826; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1827; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1828; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1829; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1830; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
1831; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1832; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1833; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1834; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1835; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1836; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
1837; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1838; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1839; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
1840; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
1841; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1842; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
1843; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
1844; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1845; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
1846; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1847; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
1848; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
1849; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1850; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1851; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
1852; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
1853; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1854; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1855; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1856; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
1857; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
1858; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
1859; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
1860; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
1861; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
1862; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1863; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1864; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1865; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1866; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1867; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1868; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1869; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1870; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1871; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1872; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1873; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1874; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1875; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1876; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1877; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1878; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1879; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1880; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1881; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1882; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1883; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1884; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1885; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1886; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1887; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1888; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1889; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1890; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1891; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
1892; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1893; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1894; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1895; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1896; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
1897; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
1898; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1899; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
1900; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
1901; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
1902; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1903; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
1904; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
1905; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
1906; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1907; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1908; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1909; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1910; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1911; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1912; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1913; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1914; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1915; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1916; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1917; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1918; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1919; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1920; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1921; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1922; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1923; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1924; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1925; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1926; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1927; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1928; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1929; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1930; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1931; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1932; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
1933; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
1934; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
1935; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1936; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
1937; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
1938; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
1939; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1940; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1941; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
1942; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
1943; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1944; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1945; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1946; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1947; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1948; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1949; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1950; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1951; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1952; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1953; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1954; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1955; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1956; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1957; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1958; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1959; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1960; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1961; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1962; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1963; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1964; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1965; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1966; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1967; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1968; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1969; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1970; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1971; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1972; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1973; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1974; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1975; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1976; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1977; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1978; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1979; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1980; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1981; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1982; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1983; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1984; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1985; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1986; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1987; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1988; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1989; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1990; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1991; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1992; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1993; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1994; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1995; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1996; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1997; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1998; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1999; GISEL-NEXT:    s_setpc_b64 s[30:31]
2000;
2001; CGP-LABEL: v_urem_v2i64_oddk_denom:
2002; CGP:       ; %bb.0:
2003; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2004; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
2005; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
2006; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
2007; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
2008; CGP-NEXT:    v_mov_b32_e32 v6, v5
2009; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
2010; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
2011; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
2012; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
2013; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2014; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
2015; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
2016; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
2017; CGP-NEXT:    v_trunc_f32_e32 v6, v6
2018; CGP-NEXT:    v_trunc_f32_e32 v7, v7
2019; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
2020; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
2021; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
2022; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
2023; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2024; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
2025; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2026; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
2027; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
2028; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
2029; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
2030; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
2031; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
2032; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
2033; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2034; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
2035; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2036; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
2037; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
2038; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
2039; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
2040; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
2041; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2042; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2043; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
2044; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
2045; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
2046; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
2047; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
2048; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
2049; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
2050; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2051; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
2052; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
2053; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
2054; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
2055; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2056; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
2057; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2058; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2059; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2060; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
2061; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2062; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
2063; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2064; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2065; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
2066; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2067; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2068; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
2069; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
2070; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2071; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2072; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2073; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2074; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2075; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2076; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
2077; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2078; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
2079; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2080; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
2081; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
2082; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
2083; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
2084; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
2085; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
2086; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
2087; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
2088; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
2089; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
2090; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
2091; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
2092; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
2093; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
2094; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
2095; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
2096; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
2097; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
2098; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
2099; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
2100; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
2101; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
2102; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
2103; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
2104; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
2105; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
2106; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
2107; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
2108; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
2109; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
2110; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
2111; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
2112; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
2113; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
2114; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
2115; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
2116; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
2117; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
2118; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
2119; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
2120; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
2121; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
2122; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
2123; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2124; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2125; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
2126; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
2127; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
2128; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
2129; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
2130; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
2131; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
2132; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
2133; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
2134; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
2135; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
2136; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
2137; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
2138; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
2139; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
2140; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
2141; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2142; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
2143; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
2144; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
2145; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2146; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
2147; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
2148; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
2149; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
2150; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
2151; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
2152; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
2153; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
2154; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
2155; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
2156; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
2157; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
2158; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
2159; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2160; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2161; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
2162; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2163; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2164; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2165; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
2166; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2167; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2168; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2169; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
2170; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2171; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
2172; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2173; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
2174; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2175; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
2176; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
2177; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
2178; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
2179; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2180; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2181; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
2182; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2183; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2184; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
2185; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
2186; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
2187; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
2188; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
2189; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
2190; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
2191; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2192; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
2193; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
2194; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
2195; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
2196; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
2197; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2198; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
2199; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
2200; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
2201; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2202; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
2203; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2204; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
2205; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
2206; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
2207; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
2208; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
2209; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
2210; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
2211; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2212; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
2213; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
2214; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
2215; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
2216; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
2217; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
2218; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2219; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
2220; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2221; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
2222; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
2223; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
2224; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2225; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
2226; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2227; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
2228; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2229; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
2230; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
2231; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
2232; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
2233; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
2234; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
2235; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
2236; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
2237; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2238; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
2239; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2240; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
2241; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
2242; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
2243; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
2244; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2245; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2246; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
2247; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
2248; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
2249; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
2250; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
2251; CGP-NEXT:    s_setpc_b64 s[30:31]
2252  %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
2253  ret <2 x i64> %result
2254}
2255
2256define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
2257; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
2258; CHECK:       ; %bb.0:
2259; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260; CHECK-NEXT:    s_movk_i32 s4, 0x1000
2261; CHECK-NEXT:    s_mov_b32 s5, 0
2262; CHECK-NEXT:    v_mov_b32_e32 v6, 0
2263; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
2264; CHECK-NEXT:    v_or_b32_e32 v7, v1, v5
2265; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
2266; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
2267; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
2268; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
2269; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2270; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
2271; CHECK-NEXT:    s_cbranch_execz BB7_2
2272; CHECK-NEXT:  ; %bb.1:
2273; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
2274; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
2275; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
2276; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v5, vcc
2277; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
2278; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2279; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2280; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
2281; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
2282; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
2283; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
2284; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
2285; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v3
2286; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v2
2287; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v2
2288; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v2
2289; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2290; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v9
2291; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v9
2292; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v9
2293; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2294; CHECK-NEXT:    v_mul_lo_u32 v11, v2, v8
2295; CHECK-NEXT:    v_mul_lo_u32 v13, v3, v8
2296; CHECK-NEXT:    v_mul_hi_u32 v14, v2, v8
2297; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v8
2298; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2299; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2300; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
2301; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2302; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2303; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2304; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
2305; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2306; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2307; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
2308; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2309; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2310; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2311; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2312; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
2313; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
2314; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
2315; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v2
2316; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v2
2317; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v2
2318; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
2319; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
2320; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v8
2321; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
2322; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2323; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2324; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
2325; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
2326; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v6
2327; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
2328; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2329; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2330; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
2331; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2332; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
2333; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2334; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
2335; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2336; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
2337; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
2338; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
2339; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2340; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2341; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
2342; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
2343; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
2344; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2345; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
2346; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
2347; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2348; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v3
2349; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v3
2350; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v3
2351; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
2352; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2353; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2354; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
2355; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2356; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2357; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2358; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
2359; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2360; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2361; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
2362; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
2363; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2364; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2365; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v2
2366; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
2367; CHECK-NEXT:    v_mul_hi_u32 v2, v4, v2
2368; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
2369; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v3
2370; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
2371; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2372; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v7
2373; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v2, vcc
2374; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
2375; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
2376; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
2377; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v5
2378; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2379; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2380; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v5
2381; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
2382; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v3, v4
2383; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
2384; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
2385; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2386; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2387; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
2388; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2389; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v4
2390; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2391; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v5
2392; CHECK-NEXT:    v_cndmask_b32_e32 v5, v10, v9, vcc
2393; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2394; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v11, vcc
2395; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
2396; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
2397; CHECK-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc
2398; CHECK-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
2399; CHECK-NEXT:  BB7_2: ; %Flow
2400; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2401; CHECK-NEXT:    s_xor_b64 exec, exec, s[4:5]
2402; CHECK-NEXT:    s_cbranch_execz BB7_4
2403; CHECK-NEXT:  ; %bb.3:
2404; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
2405; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, 0, v4
2406; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
2407; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
2408; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
2409; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
2410; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2411; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
2412; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
2413; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v4
2414; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2415; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
2416; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2417; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2418; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
2419; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2420; CHECK-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
2421; CHECK-NEXT:    v_mov_b32_e32 v3, 0
2422; CHECK-NEXT:  BB7_4:
2423; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
2424; CHECK-NEXT:    v_mov_b32_e32 v0, v2
2425; CHECK-NEXT:    v_mov_b32_e32 v1, v3
2426; CHECK-NEXT:    s_setpc_b64 s[30:31]
2427  %shl.y = shl i64 4096, %y
2428  %r = urem i64 %x, %shl.y
2429  ret i64 %r
2430}
2431
2432define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2433; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
2434; GISEL:       ; %bb.0:
2435; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2436; GISEL-NEXT:    s_movk_i32 s4, 0x1000
2437; GISEL-NEXT:    s_mov_b32 s5, 0
2438; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v4
2439; GISEL-NEXT:    v_lshl_b64 v[6:7], s[4:5], v6
2440; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
2441; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
2442; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
2443; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
2444; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
2445; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
2446; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
2447; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
2448; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
2449; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
2450; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
2451; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
2452; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2453; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
2454; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
2455; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
2456; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2457; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2458; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
2459; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
2460; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
2461; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2462; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2463; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2464; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2465; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2466; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
2467; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
2468; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
2469; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
2470; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2471; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
2472; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2473; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
2474; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2475; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2476; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2477; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
2478; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2479; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2480; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
2481; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2482; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
2483; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
2484; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
2485; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
2486; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2487; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2488; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
2489; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
2490; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
2491; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2492; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2493; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
2494; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2495; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
2496; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
2497; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
2498; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
2499; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2500; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2501; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
2502; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2503; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
2504; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
2505; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2506; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2507; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
2508; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
2509; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2510; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
2511; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
2512; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
2513; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
2514; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
2515; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2516; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2517; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2518; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2519; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2520; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
2521; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
2522; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
2523; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2524; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2525; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2526; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2527; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2528; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2529; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2530; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2531; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
2532; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2533; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
2534; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
2535; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
2536; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
2537; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
2538; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2539; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
2540; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
2541; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
2542; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
2543; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2544; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
2545; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2546; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
2547; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
2548; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
2549; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2550; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
2551; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
2552; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2553; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
2554; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
2555; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
2556; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
2557; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
2558; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
2559; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2560; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
2561; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
2562; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
2563; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2564; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2565; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
2566; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
2567; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
2568; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2569; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2570; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2571; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2572; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
2573; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2574; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
2575; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
2576; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
2577; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
2578; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
2579; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
2580; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
2581; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
2582; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2583; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
2584; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
2585; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
2586; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
2587; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2588; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2589; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2590; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2591; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2592; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
2593; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
2594; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
2595; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
2596; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2597; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
2598; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2599; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2600; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2601; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2602; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2603; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
2604; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2605; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2606; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
2607; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
2608; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
2609; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
2610; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
2611; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
2612; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2613; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2614; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
2615; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
2616; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
2617; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2618; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2619; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2620; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2621; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
2622; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2623; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
2624; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
2625; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2626; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2627; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
2628; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2629; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
2630; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
2631; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2632; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2633; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
2634; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2635; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
2636; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
2637; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
2638; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
2639; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
2640; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
2641; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2642; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2643; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2644; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2645; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2646; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
2647; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
2648; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
2649; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
2650; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2651; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2652; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2653; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2654; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2655; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2656; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2657; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
2658; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
2659; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
2660; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
2661; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
2662; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
2663; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
2664; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2665; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
2666; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
2667; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2668; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
2669; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2670; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
2671; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2672; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
2673; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
2674; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
2675; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
2676; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
2677; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
2678; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2679; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
2680; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2681; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
2682; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
2683; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
2684; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
2685; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2686; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2687; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
2688; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
2689; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2690; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2691; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
2692; GISEL-NEXT:    s_setpc_b64 s[30:31]
2693;
2694; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
2695; CGP:       ; %bb.0:
2696; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2697; CGP-NEXT:    v_mov_b32_e32 v5, v0
2698; CGP-NEXT:    v_mov_b32_e32 v7, v1
2699; CGP-NEXT:    s_movk_i32 s4, 0x1000
2700; CGP-NEXT:    s_mov_b32 s5, 0
2701; CGP-NEXT:    v_mov_b32_e32 v0, 0
2702; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
2703; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
2704; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
2705; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2706; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
2707; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
2708; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
2709; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2710; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
2711; CGP-NEXT:    s_cbranch_execz BB8_2
2712; CGP-NEXT:  ; %bb.1:
2713; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2714; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
2715; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
2716; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
2717; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
2718; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2719; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
2720; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
2721; CGP-NEXT:    v_trunc_f32_e32 v1, v1
2722; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
2723; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
2724; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2725; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
2726; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
2727; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
2728; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
2729; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
2730; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
2731; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
2732; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
2733; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
2734; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
2735; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
2736; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
2737; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
2738; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2739; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2740; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
2741; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2742; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2743; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2744; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
2745; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2746; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2747; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
2748; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2749; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2750; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2751; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2752; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
2753; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
2754; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
2755; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
2756; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
2757; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
2758; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
2759; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
2760; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
2761; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
2762; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
2763; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
2764; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
2765; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
2766; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
2767; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
2768; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
2769; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2770; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
2771; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2772; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
2773; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2774; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
2775; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2776; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
2777; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
2778; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
2779; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2780; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
2781; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
2782; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
2783; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2784; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2785; CGP-NEXT:    v_mul_lo_u32 v4, v7, v0
2786; CGP-NEXT:    v_mul_hi_u32 v6, v5, v0
2787; CGP-NEXT:    v_mul_hi_u32 v0, v7, v0
2788; CGP-NEXT:    v_mul_lo_u32 v12, v5, v1
2789; CGP-NEXT:    v_mul_lo_u32 v13, v7, v1
2790; CGP-NEXT:    v_mul_hi_u32 v14, v5, v1
2791; CGP-NEXT:    v_mul_hi_u32 v1, v7, v1
2792; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2793; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2794; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
2795; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2796; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2797; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2798; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
2799; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2800; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2801; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
2802; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2803; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2804; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2805; CGP-NEXT:    v_mul_lo_u32 v6, v10, v0
2806; CGP-NEXT:    v_mul_lo_u32 v12, v11, v0
2807; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
2808; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
2809; CGP-NEXT:    v_mul_lo_u32 v1, v10, v1
2810; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
2811; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
2812; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v6
2813; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v7, v0, vcc
2814; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v7, v0
2815; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v10
2816; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2817; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v11
2818; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2819; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
2820; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
2821; CGP-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
2822; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v10
2823; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
2824; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v10
2825; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2826; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
2827; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v11
2828; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
2829; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v7, v10
2830; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2831; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v11
2832; CGP-NEXT:    v_cndmask_b32_e32 v11, v14, v13, vcc
2833; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
2834; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
2835; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
2836; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
2837; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2838; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
2839; CGP-NEXT:  BB8_2: ; %Flow2
2840; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2841; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
2842; CGP-NEXT:    s_cbranch_execz BB8_4
2843; CGP-NEXT:  ; %bb.3:
2844; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2845; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v10
2846; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2847; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2848; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2849; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
2850; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
2851; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2852; CGP-NEXT:    v_mul_hi_u32 v0, v5, v0
2853; CGP-NEXT:    v_mul_lo_u32 v0, v0, v10
2854; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v5, v0
2855; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v10
2856; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v10
2857; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2858; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v10
2859; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v10
2860; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2861; CGP-NEXT:    v_mov_b32_e32 v1, 0
2862; CGP-NEXT:  BB8_4:
2863; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2864; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
2865; CGP-NEXT:    v_mov_b32_e32 v4, 0
2866; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
2867; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
2868; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
2869; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
2870; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2871; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
2872; CGP-NEXT:    s_cbranch_execz BB8_6
2873; CGP-NEXT:  ; %bb.5:
2874; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
2875; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
2876; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v8
2877; CGP-NEXT:    v_subb_u32_e32 v7, vcc, 0, v9, vcc
2878; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2879; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2880; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2881; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2882; CGP-NEXT:    v_trunc_f32_e32 v5, v5
2883; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2884; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2885; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2886; CGP-NEXT:    v_mul_lo_u32 v10, v6, v5
2887; CGP-NEXT:    v_mul_lo_u32 v11, v6, v4
2888; CGP-NEXT:    v_mul_lo_u32 v12, v7, v4
2889; CGP-NEXT:    v_mul_hi_u32 v13, v6, v4
2890; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
2891; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
2892; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
2893; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
2894; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
2895; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
2896; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
2897; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2898; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
2899; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2900; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2901; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
2902; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2903; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2904; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2905; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
2906; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2907; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2908; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
2909; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2910; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2911; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2912; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2913; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
2914; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
2915; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
2916; CGP-NEXT:    v_mul_lo_u32 v10, v6, v4
2917; CGP-NEXT:    v_mul_lo_u32 v7, v7, v4
2918; CGP-NEXT:    v_mul_hi_u32 v12, v6, v4
2919; CGP-NEXT:    v_mul_lo_u32 v6, v6, v11
2920; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
2921; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
2922; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
2923; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2924; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
2925; CGP-NEXT:    v_mul_lo_u32 v7, v4, v6
2926; CGP-NEXT:    v_mul_lo_u32 v12, v11, v6
2927; CGP-NEXT:    v_mul_hi_u32 v15, v4, v6
2928; CGP-NEXT:    v_mul_hi_u32 v6, v11, v6
2929; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v7
2930; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2931; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
2932; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2933; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v14
2934; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2935; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
2936; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2937; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2938; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
2939; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
2940; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2941; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2942; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2943; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
2944; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
2945; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2946; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
2947; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
2948; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2949; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
2950; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
2951; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
2952; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
2953; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
2954; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2955; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
2956; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2957; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2958; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2959; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2960; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2961; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
2962; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
2963; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2964; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2965; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2966; CGP-NEXT:    v_mul_lo_u32 v7, v8, v4
2967; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
2968; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
2969; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
2970; CGP-NEXT:    v_mul_lo_u32 v5, v8, v5
2971; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
2972; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2973; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v7
2974; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
2975; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2976; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v8
2977; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2978; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v9
2979; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2980; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
2981; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v9
2982; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
2983; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v8
2984; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
2985; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
2986; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2987; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
2988; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v9
2989; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2990; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v7, v8
2991; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2992; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
2993; CGP-NEXT:    v_cndmask_b32_e32 v9, v12, v11, vcc
2994; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
2995; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v13, vcc
2996; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
2997; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2998; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
2999; CGP-NEXT:    v_cndmask_b32_e32 v5, v6, v3, vcc
3000; CGP-NEXT:  BB8_6: ; %Flow
3001; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
3002; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
3003; CGP-NEXT:    s_cbranch_execz BB8_8
3004; CGP-NEXT:  ; %bb.7:
3005; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
3006; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v8
3007; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
3008; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
3009; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
3010; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
3011; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
3012; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
3013; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
3014; CGP-NEXT:    v_mul_lo_u32 v3, v3, v8
3015; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
3016; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v8
3017; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3018; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3019; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v8
3020; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3021; CGP-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
3022; CGP-NEXT:    v_mov_b32_e32 v5, 0
3023; CGP-NEXT:  BB8_8:
3024; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
3025; CGP-NEXT:    v_mov_b32_e32 v2, v4
3026; CGP-NEXT:    v_mov_b32_e32 v3, v5
3027; CGP-NEXT:    s_setpc_b64 s[30:31]
3028  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
3029  %r = urem <2 x i64> %x, %shl.y
3030  ret <2 x i64> %r
3031}
3032
3033define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
3034; GISEL-LABEL: v_urem_i64_24bit:
3035; GISEL:       ; %bb.0:
3036; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3037; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
3038; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
3039; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
3040; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
3041; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
3042; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3043; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
3044; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
3045; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
3046; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
3047; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
3048; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
3049; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
3050; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
3051; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
3052; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3053; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3054; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
3055; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3056; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3057; GISEL-NEXT:    v_mov_b32_e32 v1, 0
3058; GISEL-NEXT:    s_setpc_b64 s[30:31]
3059;
3060; CGP-LABEL: v_urem_i64_24bit:
3061; CGP:       ; %bb.0:
3062; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3063; CGP-NEXT:    s_mov_b32 s4, 0xffffff
3064; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
3065; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
3066; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
3067; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
3068; CGP-NEXT:    v_rcp_f32_e32 v4, v3
3069; CGP-NEXT:    v_mul_f32_e32 v4, v2, v4
3070; CGP-NEXT:    v_trunc_f32_e32 v4, v4
3071; CGP-NEXT:    v_mad_f32 v2, -v4, v3, v2
3072; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
3073; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v3
3074; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
3075; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
3076; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
3077; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3078; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
3079; CGP-NEXT:    v_mov_b32_e32 v1, 0
3080; CGP-NEXT:    s_setpc_b64 s[30:31]
3081  %num.mask = and i64 %num, 16777215
3082  %den.mask = and i64 %den, 16777215
3083  %result = urem i64 %num.mask, %den.mask
3084  ret i64 %result
3085}
3086
3087define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
3088; GISEL-LABEL: v_urem_v2i64_24bit:
3089; GISEL:       ; %bb.0:
3090; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3091; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
3092; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, 0
3093; GISEL-NEXT:    v_and_b32_e32 v3, s6, v4
3094; GISEL-NEXT:    v_and_b32_e32 v4, s6, v6
3095; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
3096; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
3097; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
3098; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
3099; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v4
3100; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
3101; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v1
3102; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v1
3103; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v5
3104; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
3105; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
3106; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
3107; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v1
3108; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
3109; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
3110; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
3111; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v8
3112; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
3113; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
3114; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
3115; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
3116; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
3117; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
3118; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
3119; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v1
3120; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v1
3121; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v1
3122; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
3123; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
3124; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
3125; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
3126; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
3127; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
3128; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v17
3129; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
3130; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v13
3131; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
3132; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3133; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
3134; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
3135; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v14
3136; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
3137; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
3138; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
3139; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
3140; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
3141; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
3142; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3143; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
3144; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3145; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
3146; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v12
3147; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
3148; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
3149; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
3150; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3151; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3152; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3153; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
3154; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
3155; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
3156; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
3157; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
3158; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
3159; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3160; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
3161; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
3162; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
3163; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
3164; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
3165; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
3166; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3167; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
3168; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3169; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
3170; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
3171; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
3172; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
3173; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
3174; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
3175; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
3176; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v1
3177; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v1
3178; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v1
3179; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
3180; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
3181; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v13
3182; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
3183; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
3184; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v5
3185; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v14
3186; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v12
3187; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v12
3188; GISEL-NEXT:    v_mul_hi_u32 v12, v14, v12
3189; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
3190; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v7, v6
3191; GISEL-NEXT:    v_mul_lo_u32 v7, v16, v13
3192; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
3193; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v13
3194; GISEL-NEXT:    v_mul_hi_u32 v13, v16, v13
3195; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v15
3196; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
3197; GISEL-NEXT:    v_mul_lo_u32 v15, v1, v6
3198; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v9
3199; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v17
3200; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
3201; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v10
3202; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v6
3203; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v6
3204; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
3205; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v9
3206; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
3207; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
3208; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
3209; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
3210; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v12
3211; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
3212; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v14, v13
3213; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
3214; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v15, v19
3215; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
3216; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v10
3217; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[8:9]
3218; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
3219; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v13, v9
3220; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[6:7]
3221; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v18, v15
3222; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v12, v10
3223; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v17, v19
3224; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
3225; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v15
3226; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
3227; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
3228; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
3229; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v14
3230; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
3231; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
3232; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v12
3233; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
3234; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v11, v10, s[4:5]
3235; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
3236; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
3237; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v1
3238; GISEL-NEXT:    v_mul_hi_u32 v10, v0, v1
3239; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
3240; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3241; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
3242; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v5
3243; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v5
3244; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
3245; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
3246; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
3247; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
3248; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
3249; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v8
3250; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v8
3251; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v8
3252; GISEL-NEXT:    v_mul_hi_u32 v8, 0, v8
3253; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
3254; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
3255; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
3256; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
3257; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
3258; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
3259; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
3260; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3261; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
3262; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3263; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
3264; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
3265; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
3266; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3267; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
3268; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
3269; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
3270; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
3271; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
3272; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
3273; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
3274; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3275; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3276; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3277; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
3278; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v1
3279; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v1
3280; GISEL-NEXT:    v_mul_hi_u32 v1, v3, v1
3281; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
3282; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v5
3283; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
3284; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
3285; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
3286; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
3287; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v6
3288; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v7
3289; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
3290; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
3291; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
3292; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
3293; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
3294; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v1, vcc
3295; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], 0, v1
3296; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
3297; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
3298; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
3299; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
3300; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], 0, v5
3301; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v4
3302; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
3303; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
3304; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[6:7]
3305; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
3306; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v8
3307; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
3308; GISEL-NEXT:    v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
3309; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
3310; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
3311; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v0, v3
3312; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
3313; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v3
3314; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
3315; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
3316; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
3317; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v2, v4
3318; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
3319; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v4
3320; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
3321; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
3322; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
3323; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
3324; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
3325; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v5
3326; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
3327; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v11, v4
3328; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v5, vcc
3329; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
3330; GISEL-NEXT:    v_cndmask_b32_e32 v12, v14, v12, vcc
3331; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
3332; GISEL-NEXT:    v_cndmask_b32_e32 v13, v16, v13, vcc
3333; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
3334; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
3335; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
3336; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v4, s[4:5]
3337; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
3338; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
3339; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3340; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v17, s[4:5]
3341; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
3342; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
3343; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
3344; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
3345; GISEL-NEXT:    s_setpc_b64 s[30:31]
3346;
3347; CGP-LABEL: v_urem_v2i64_24bit:
3348; CGP:       ; %bb.0:
3349; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3350; CGP-NEXT:    s_mov_b32 s6, 0xffffff
3351; CGP-NEXT:    v_mov_b32_e32 v1, 0
3352; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3353; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
3354; CGP-NEXT:    v_and_b32_e32 v3, s6, v4
3355; CGP-NEXT:    v_and_b32_e32 v4, s6, v6
3356; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v0
3357; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
3358; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v2
3359; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v4
3360; CGP-NEXT:    v_rcp_f32_e32 v9, v6
3361; CGP-NEXT:    v_rcp_f32_e32 v10, v8
3362; CGP-NEXT:    v_mul_f32_e32 v9, v5, v9
3363; CGP-NEXT:    v_mul_f32_e32 v10, v7, v10
3364; CGP-NEXT:    v_trunc_f32_e32 v9, v9
3365; CGP-NEXT:    v_trunc_f32_e32 v10, v10
3366; CGP-NEXT:    v_mad_f32 v5, -v9, v6, v5
3367; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
3368; CGP-NEXT:    v_mad_f32 v7, -v10, v8, v7
3369; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
3370; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v5|, v6
3371; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
3372; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v7|, v8
3373; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
3374; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
3375; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
3376; CGP-NEXT:    v_mul_lo_u32 v3, v5, v3
3377; CGP-NEXT:    v_mul_lo_u32 v4, v6, v4
3378; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
3379; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
3380; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3381; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
3382; CGP-NEXT:    v_mov_b32_e32 v3, v1
3383; CGP-NEXT:    s_setpc_b64 s[30:31]
3384  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
3385  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
3386  %result = urem <2 x i64> %num.mask, %den.mask
3387  ret <2 x i64> %result
3388}
3389