1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_udiv_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_udiv_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
12; CHECK-NEXT:    v_mov_b32_e32 v4, 0
13; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
14; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
15; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
16; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
17; CHECK-NEXT:    s_cbranch_execz BB0_2
18; CHECK-NEXT:  ; %bb.1:
19; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
20; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
21; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
22; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
23; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
24; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
25; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
26; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
27; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
28; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
29; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
30; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
31; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
32; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v4
33; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v4
34; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
35; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
36; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v9
37; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v9
38; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
39; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
40; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v8
41; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v8
42; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v8
43; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
44; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
45; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
46; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
47; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
48; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
49; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
50; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
51; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
52; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
53; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
54; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
55; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
56; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
57; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
58; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
59; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
60; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
61; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v4
62; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
63; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v4
64; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
65; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
66; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v8
67; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
68; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
69; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
70; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
71; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
72; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v6
73; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
74; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
75; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
76; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
77; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
78; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
79; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
80; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
81; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
82; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
83; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
84; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
85; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
86; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
87; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
88; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
89; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
90; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
91; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
92; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
93; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
94; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
95; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
96; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
97; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
98; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
99; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
100; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
101; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
102; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
103; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
104; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
105; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
106; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
107; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
108; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
109; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
110; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
111; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
112; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v4
113; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v4
114; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
115; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
116; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
117; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
118; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
119; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
120; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
121; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
122; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
123; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
124; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
125; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
126; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
127; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
128; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
129; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
130; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v3
131; CHECK-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
132; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
133; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
134; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
135; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
136; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
137; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
138; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
139; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
140; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
141; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
142; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
143; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
144; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
145; CHECK-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
146; CHECK-NEXT:    ; implicit-def: $vgpr2
147; CHECK-NEXT:    ; implicit-def: $vgpr0
148; CHECK-NEXT:  BB0_2: ; %Flow
149; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
150; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
151; CHECK-NEXT:    s_cbranch_execz BB0_4
152; CHECK-NEXT:  ; %bb.3:
153; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
154; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
155; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
156; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
157; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
158; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
159; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
160; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
161; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
162; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
163; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
164; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
165; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
166; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
167; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v2
168; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
169; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
170; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
171; CHECK-NEXT:    v_cndmask_b32_e32 v4, v1, v3, vcc
172; CHECK-NEXT:    v_mov_b32_e32 v5, 0
173; CHECK-NEXT:  BB0_4:
174; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
175; CHECK-NEXT:    v_mov_b32_e32 v0, v4
176; CHECK-NEXT:    v_mov_b32_e32 v1, v5
177; CHECK-NEXT:    s_setpc_b64 s[30:31]
178  %result = udiv i64 %num, %den
179  ret i64 %result
180}
181
182; FIXME: This is a workaround for not handling uniform VGPR case.
183declare i32 @llvm.amdgcn.readfirstlane(i32)
184
185define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
186; CHECK-LABEL: s_udiv_i64:
187; CHECK:       ; %bb.0:
188; CHECK-NEXT:    s_or_b64 s[6:7], s[0:1], s[2:3]
189; CHECK-NEXT:    s_mov_b32 s4, 0
190; CHECK-NEXT:    s_mov_b32 s5, -1
191; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[4:5]
192; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[6:7], 0
193; CHECK-NEXT:    s_cbranch_vccz BB1_2
194; CHECK-NEXT:  ; %bb.1:
195; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
196; CHECK-NEXT:    v_mov_b32_e32 v1, s3
197; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
198; CHECK-NEXT:    s_sub_u32 s6, 0, s2
199; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
200; CHECK-NEXT:    v_mov_b32_e32 v3, s1
201; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
202; CHECK-NEXT:    s_and_b32 s4, s4, 1
203; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
204; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
205; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
206; CHECK-NEXT:    s_subb_u32 s7, 0, s3
207; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
208; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
209; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
210; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
211; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
212; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
213; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
214; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
215; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
216; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
217; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
218; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
219; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
220; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
221; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
222; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
223; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
224; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
225; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
226; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
227; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
228; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
229; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
230; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
231; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
232; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
233; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
234; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
235; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
236; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
237; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
238; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
239; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
240; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
241; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
242; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
243; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
244; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
245; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
246; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
247; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
248; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
249; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
250; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
251; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
252; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
253; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
254; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
255; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
256; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
257; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
258; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
259; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
260; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
261; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
262; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
263; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
264; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
265; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
266; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
267; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
268; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
269; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
270; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
271; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
272; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
273; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
274; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
275; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
276; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
277; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
278; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
279; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
280; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
281; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
282; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
283; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
284; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
285; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
286; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
287; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
288; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
289; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
290; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
291; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
292; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
293; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
294; CHECK-NEXT:    v_mul_hi_u32 v7, s2, v0
295; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
296; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
297; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
298; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
299; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
300; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
301; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, s0, v5
302; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v2, vcc
303; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], s1, v2
304; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v5
305; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
306; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
307; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
308; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v2, v1, vcc
309; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
310; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
311; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v5
312; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
313; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
314; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
315; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
316; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
317; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
318; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
319; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
320; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v4, vcc
321; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
322; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
323; CHECK-NEXT:    s_mov_b32 s5, 0
324; CHECK-NEXT:    s_branch BB1_3
325; CHECK-NEXT:  BB1_2:
326; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
327; CHECK-NEXT:  BB1_3: ; %Flow
328; CHECK-NEXT:    s_xor_b32 s1, s5, -1
329; CHECK-NEXT:    s_and_b32 s1, s1, 1
330; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
331; CHECK-NEXT:    s_cbranch_scc1 BB1_5
332; CHECK-NEXT:  ; %bb.4:
333; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
334; CHECK-NEXT:    s_sub_i32 s1, 0, s2
335; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
336; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
337; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
338; CHECK-NEXT:    v_mul_lo_u32 v1, s1, v0
339; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
340; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
341; CHECK-NEXT:    v_mul_hi_u32 v0, s0, v0
342; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
343; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
344; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
345; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
346; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
347; CHECK-NEXT:    v_subrev_i32_e64 v2, s[0:1], s2, v1
348; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
349; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
350; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
351; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
352; CHECK-NEXT:  BB1_5:
353; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
354; CHECK-NEXT:    s_mov_b32 s1, s0
355; CHECK-NEXT:    ; return to shader part epilog
356  %result = udiv i64 %num, %den
357  %cast = bitcast i64 %result to <2 x i32>
358  %elt.0 = extractelement <2 x i32> %cast, i32 0
359  %elt.1 = extractelement <2 x i32> %cast, i32 1
360  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
361  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
362  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
363  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
364  %cast.back = bitcast <2 x i32> %ins.1 to i64
365  ret i64 %cast.back
366}
367
368define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
369; GISEL-LABEL: v_udiv_v2i64:
370; GISEL:       ; %bb.0:
371; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
373; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
374; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
375; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
376; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
377; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
378; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
379; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
380; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
381; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
382; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
383; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
384; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
385; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
386; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
387; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
388; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
389; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
390; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
391; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
392; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
393; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
394; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
395; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
396; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
397; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
398; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
399; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
400; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
401; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
402; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
403; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
404; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
405; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
406; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
407; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
408; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
409; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
410; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
411; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
412; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
413; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
414; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
415; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
416; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
417; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
418; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
419; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
420; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
421; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
422; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
423; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
424; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
425; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
426; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
427; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
428; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
429; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
430; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
431; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
432; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
433; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
434; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
435; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
436; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
437; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
438; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
439; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
440; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
441; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
442; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
443; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
444; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
445; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
446; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
447; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
448; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
449; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
450; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
451; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
452; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
453; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
454; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
455; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
456; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
457; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
458; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
459; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
460; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
461; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
462; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
463; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
464; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
465; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
466; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
467; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
468; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
469; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
470; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
471; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
472; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
473; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
474; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
475; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
476; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
477; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
478; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
479; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
480; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v4
481; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
482; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
483; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v8
484; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
485; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
486; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
487; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
488; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
489; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
490; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
491; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
492; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
493; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
494; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
495; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
496; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
497; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
498; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
499; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
500; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
501; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
502; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
503; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
504; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
505; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
506; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
507; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
508; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
509; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
510; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
511; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
512; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
513; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
514; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
515; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
516; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
517; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
518; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
519; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
520; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
521; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
522; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
523; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
524; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
525; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
526; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
527; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
528; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
529; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
530; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
531; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
532; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
533; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
534; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
535; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
536; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
537; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
538; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
539; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
540; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
541; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
542; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
543; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
544; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
545; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
546; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
547; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
548; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
549; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
550; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
551; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
552; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
553; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
554; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
555; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
556; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
557; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
558; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
559; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
560; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
561; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
562; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
563; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
564; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
565; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
566; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
567; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
568; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
569; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
570; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
571; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
572; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
573; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
574; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
575; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
576; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
577; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
578; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
579; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
580; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
581; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
582; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
583; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
584; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
585; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
586; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
587; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
588; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
589; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
590; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
591; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
592; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
593; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
594; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v5
595; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v4
596; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
597; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
598; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
599; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
600; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
601; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
602; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
603; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
604; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
605; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
606; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
607; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
608; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
609; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
610; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
611; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
612; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
613; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
614; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
615; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
616; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
617; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
618; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
619; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
620; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
621; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
622; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
623; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
624; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
625; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
626; GISEL-NEXT:    s_setpc_b64 s[30:31]
627;
628; CGP-LABEL: v_udiv_v2i64:
629; CGP:       ; %bb.0:
630; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; CGP-NEXT:    v_mov_b32_e32 v8, v0
632; CGP-NEXT:    v_mov_b32_e32 v9, v1
633; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
634; CGP-NEXT:    v_mov_b32_e32 v0, 0
635; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
636; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
637; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
638; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
639; CGP-NEXT:    s_cbranch_execz BB2_2
640; CGP-NEXT:  ; %bb.1:
641; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
642; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
643; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
644; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
645; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
646; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
647; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
648; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
649; CGP-NEXT:    v_trunc_f32_e32 v1, v1
650; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
651; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
652; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
653; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
654; CGP-NEXT:    v_mul_lo_u32 v13, v10, v0
655; CGP-NEXT:    v_mul_lo_u32 v14, v11, v0
656; CGP-NEXT:    v_mul_hi_u32 v15, v10, v0
657; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
658; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
659; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
660; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
661; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
662; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
663; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
664; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
665; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
666; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
667; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
668; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
669; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
670; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
671; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
672; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
673; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
674; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
675; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
676; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
677; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
678; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
679; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
680; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
681; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
682; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
683; CGP-NEXT:    v_mul_lo_u32 v12, v10, v0
684; CGP-NEXT:    v_mul_lo_u32 v11, v11, v0
685; CGP-NEXT:    v_mul_hi_u32 v14, v10, v0
686; CGP-NEXT:    v_mul_lo_u32 v10, v10, v13
687; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
688; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
689; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
690; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
691; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
692; CGP-NEXT:    v_mul_lo_u32 v11, v0, v10
693; CGP-NEXT:    v_mul_lo_u32 v14, v13, v10
694; CGP-NEXT:    v_mul_hi_u32 v17, v0, v10
695; CGP-NEXT:    v_mul_hi_u32 v10, v13, v10
696; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
697; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
698; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
699; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
700; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
701; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
702; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
703; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
704; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
705; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
706; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
707; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
708; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
709; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
710; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
711; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
712; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
713; CGP-NEXT:    v_mul_lo_u32 v10, v9, v0
714; CGP-NEXT:    v_mul_hi_u32 v11, v8, v0
715; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
716; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
717; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
718; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
719; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
720; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
721; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
722; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
723; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
724; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
725; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
726; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
727; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
728; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
729; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
730; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
731; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
732; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
733; CGP-NEXT:    v_mul_lo_u32 v11, v4, v0
734; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
735; CGP-NEXT:    v_mul_hi_u32 v13, v4, v0
736; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
737; CGP-NEXT:    v_mul_lo_u32 v10, v4, v1
738; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
739; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
740; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
741; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
742; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
743; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
744; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v11
745; CGP-NEXT:    v_subb_u32_e64 v11, s[4:5], v9, v10, vcc
746; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v9, v10
747; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v4
748; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
749; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
750; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
751; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v5, vcc
752; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
753; CGP-NEXT:    v_cndmask_b32_e32 v10, v13, v10, vcc
754; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v4
755; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
756; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v4
757; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
758; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
759; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
760; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v5
761; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
762; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
763; CGP-NEXT:    v_cndmask_b32_e32 v4, v14, v12, vcc
764; CGP-NEXT:    v_cndmask_b32_e32 v5, v15, v16, vcc
765; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
766; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
767; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
768; CGP-NEXT:    ; implicit-def: $vgpr4
769; CGP-NEXT:    ; implicit-def: $vgpr8
770; CGP-NEXT:  BB2_2: ; %Flow2
771; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
772; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
773; CGP-NEXT:    s_cbranch_execz BB2_4
774; CGP-NEXT:  ; %bb.3:
775; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
776; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
777; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
778; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
779; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
780; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
781; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
782; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
783; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
784; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
785; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
786; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v1
787; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
788; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
789; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v1, v4
790; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
791; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
792; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
793; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
794; CGP-NEXT:    v_mov_b32_e32 v1, 0
795; CGP-NEXT:  BB2_4:
796; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
797; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
798; CGP-NEXT:    v_mov_b32_e32 v4, 0
799; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
800; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
801; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
802; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
803; CGP-NEXT:    s_cbranch_execz BB2_6
804; CGP-NEXT:  ; %bb.5:
805; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
806; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
807; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
808; CGP-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
809; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
810; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
811; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
812; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
813; CGP-NEXT:    v_trunc_f32_e32 v5, v5
814; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
815; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
816; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
817; CGP-NEXT:    v_mul_lo_u32 v10, v8, v5
818; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
819; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
820; CGP-NEXT:    v_mul_hi_u32 v13, v8, v4
821; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
822; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
823; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
824; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
825; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
826; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
827; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
828; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
829; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
830; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
831; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
832; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
833; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
834; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
835; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
836; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
837; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
838; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
839; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
840; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
841; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
842; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
843; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
844; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
845; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
846; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
847; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
848; CGP-NEXT:    v_mul_lo_u32 v9, v9, v4
849; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
850; CGP-NEXT:    v_mul_lo_u32 v8, v8, v11
851; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
852; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
853; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
854; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
855; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
856; CGP-NEXT:    v_mul_lo_u32 v9, v4, v8
857; CGP-NEXT:    v_mul_lo_u32 v12, v11, v8
858; CGP-NEXT:    v_mul_hi_u32 v15, v4, v8
859; CGP-NEXT:    v_mul_hi_u32 v8, v11, v8
860; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
861; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
862; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
863; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
864; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
865; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
866; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
867; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
868; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
869; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
870; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
871; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
872; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
873; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
874; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
875; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
876; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
877; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
878; CGP-NEXT:    v_mul_hi_u32 v9, v2, v4
879; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
880; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
881; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
882; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
883; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
884; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
885; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
886; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
887; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
888; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
889; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
890; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
891; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
892; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
893; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
894; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
895; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
896; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
897; CGP-NEXT:    v_mul_lo_u32 v9, v6, v4
898; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
899; CGP-NEXT:    v_mul_hi_u32 v11, v6, v4
900; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
901; CGP-NEXT:    v_mul_lo_u32 v8, v6, v5
902; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v4
903; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v5, vcc
904; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
905; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
906; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
907; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
908; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
909; CGP-NEXT:    v_subb_u32_e64 v9, s[4:5], v3, v8, vcc
910; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
911; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
912; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
913; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v7
914; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
915; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
916; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
917; CGP-NEXT:    v_cndmask_b32_e32 v8, v11, v8, vcc
918; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
919; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
920; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
921; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
922; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
923; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
924; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
925; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
926; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
927; CGP-NEXT:    v_cndmask_b32_e32 v2, v12, v10, vcc
928; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v14, vcc
929; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
930; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
931; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
932; CGP-NEXT:    ; implicit-def: $vgpr6
933; CGP-NEXT:    ; implicit-def: $vgpr2
934; CGP-NEXT:  BB2_6: ; %Flow
935; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
936; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
937; CGP-NEXT:    s_cbranch_execz BB2_8
938; CGP-NEXT:  ; %bb.7:
939; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
940; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v6
941; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
942; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
943; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
944; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
945; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
946; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
947; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
948; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
949; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
950; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
951; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
952; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
953; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v2, v6
954; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
955; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
956; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
957; CGP-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
958; CGP-NEXT:    v_mov_b32_e32 v5, 0
959; CGP-NEXT:  BB2_8:
960; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
961; CGP-NEXT:    v_mov_b32_e32 v2, v4
962; CGP-NEXT:    v_mov_b32_e32 v3, v5
963; CGP-NEXT:    s_setpc_b64 s[30:31]
964  %result = udiv <2 x i64> %num, %den
965  ret <2 x i64> %result
966}
967
968define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
969; CHECK-LABEL: v_udiv_i64_pow2k_denom:
970; CHECK:       ; %bb.0:
971; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
973; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
974; CHECK-NEXT:    s_movk_i32 s6, 0xf000
975; CHECK-NEXT:    s_movk_i32 s7, 0x1000
976; CHECK-NEXT:    s_bfe_i32 s4, -1, 0x10000
977; CHECK-NEXT:    s_bfe_i32 s5, -1, 0x10000
978; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
979; CHECK-NEXT:    v_mov_b32_e32 v3, s4
980; CHECK-NEXT:    v_mov_b32_e32 v4, s5
981; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
982; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
983; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
984; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
985; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v5
986; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
987; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
988; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v5
989; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
990; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
991; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
992; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
993; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v7
994; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
995; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v7
996; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
997; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v6
998; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v6
999; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
1000; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v6
1001; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1002; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1003; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
1004; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1005; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1006; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1007; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
1008; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1009; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1010; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v10
1011; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1012; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1013; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1014; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1015; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1016; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
1017; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1018; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
1019; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
1020; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
1021; CHECK-NEXT:    v_mul_lo_u32 v10, s6, v7
1022; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v6
1023; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
1024; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v6
1025; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
1026; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
1027; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v8
1028; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v8
1029; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v8
1030; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v8
1031; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v9
1032; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1033; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
1034; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1035; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
1036; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1037; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v13
1038; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1039; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1040; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
1041; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1042; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1043; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1044; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
1045; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
1046; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1047; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1048; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
1049; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
1050; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1051; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
1052; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
1053; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
1054; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
1055; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1056; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1057; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
1058; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1059; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1060; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1061; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
1062; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1063; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1064; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
1065; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1066; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1067; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1068; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
1069; CHECK-NEXT:    v_mul_lo_u32 v8, 0, v2
1070; CHECK-NEXT:    v_mul_hi_u32 v9, s7, v2
1071; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1072; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v5
1073; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
1074; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
1075; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1076; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
1077; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
1078; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1079; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
1080; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
1081; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
1082; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
1083; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1084; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v7
1085; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
1086; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1087; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
1088; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1089; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
1090; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1091; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1092; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1093; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1094; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
1095; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
1096; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1097; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1098; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1099; CHECK-NEXT:    s_setpc_b64 s[30:31]
1100  %result = udiv i64 %num, 4096
1101  ret i64 %result
1102}
1103
1104define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
1105; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
1106; GISEL:       ; %bb.0:
1107; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108; GISEL-NEXT:    s_movk_i32 s12, 0x1000
1109; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s12
1110; GISEL-NEXT:    s_sub_u32 s8, 0, s12
1111; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1112; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1113; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1114; GISEL-NEXT:    s_and_b32 s4, s4, 1
1115; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1116; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1117; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1118; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1119; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1120; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1121; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
1122; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
1123; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1124; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1125; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1126; GISEL-NEXT:    s_sub_u32 s13, 0, s12
1127; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1128; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1129; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1130; GISEL-NEXT:    s_and_b32 s4, s4, 1
1131; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1132; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1133; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1134; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1135; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1136; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1137; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1138; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1139; GISEL-NEXT:    v_mul_lo_u32 v8, s13, v6
1140; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1141; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1142; GISEL-NEXT:    v_mul_lo_u32 v10, s13, v4
1143; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1144; GISEL-NEXT:    v_mul_hi_u32 v12, s13, v4
1145; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1146; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1147; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1148; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1149; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1150; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1151; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1152; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1153; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1154; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1155; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1156; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1157; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1158; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1159; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1160; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1161; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1162; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1163; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1164; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1165; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1166; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1167; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1168; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1169; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1170; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1171; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1172; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1173; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1174; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1175; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1176; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1177; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1178; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1179; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1180; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1181; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1182; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1183; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1184; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1185; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1186; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1187; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1188; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1189; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1190; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1191; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1192; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1193; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1194; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1195; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1196; GISEL-NEXT:    v_mul_lo_u32 v11, s13, v4
1197; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v4
1198; GISEL-NEXT:    v_mul_hi_u32 v14, s13, v4
1199; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1200; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1201; GISEL-NEXT:    v_mul_lo_u32 v15, s8, v5
1202; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v5
1203; GISEL-NEXT:    v_mul_hi_u32 v17, s8, v5
1204; GISEL-NEXT:    v_mul_lo_u32 v18, s8, v13
1205; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v15
1206; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
1207; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v15
1208; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
1209; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v16
1210; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
1211; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1212; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
1213; GISEL-NEXT:    v_mul_lo_u32 v17, s13, v10
1214; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v11
1215; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
1216; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v11
1217; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
1218; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v12
1219; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
1220; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1221; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
1222; GISEL-NEXT:    v_mov_b32_e32 v14, s10
1223; GISEL-NEXT:    v_mov_b32_e32 v17, s11
1224; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
1225; GISEL-NEXT:    s_bfe_i32 s14, -1, 0x10000
1226; GISEL-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
1227; GISEL-NEXT:    v_mov_b32_e32 v8, s13
1228; GISEL-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
1229; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v11
1230; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v15
1231; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1232; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
1233; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v12
1234; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v12
1235; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v12
1236; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
1237; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1238; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
1239; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
1240; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
1241; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1242; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
1243; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v16
1244; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v16
1245; GISEL-NEXT:    v_mul_hi_u32 v16, v5, v16
1246; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
1247; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1248; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1249; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1250; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
1251; GISEL-NEXT:    v_mov_b32_e32 v19, s14
1252; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
1253; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
1254; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
1255; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1256; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
1257; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
1258; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1259; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
1260; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1261; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
1262; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1263; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1264; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
1265; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1266; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1267; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
1268; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1269; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v5
1270; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v5
1271; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1272; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v6
1273; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v6
1274; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v6
1275; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1276; GISEL-NEXT:    v_mul_lo_u32 v18, v0, v7
1277; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1278; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1279; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1280; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
1281; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v7
1282; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1283; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
1284; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1285; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
1286; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1287; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
1288; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1289; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
1290; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1291; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
1292; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1293; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1294; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
1295; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1296; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1297; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
1298; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
1299; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1300; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1301; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1302; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
1303; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1304; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1305; GISEL-NEXT:    v_mul_lo_u32 v10, s12, v4
1306; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
1307; GISEL-NEXT:    v_mul_hi_u32 v15, s12, v4
1308; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1309; GISEL-NEXT:    v_mul_lo_u32 v12, s12, v5
1310; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
1311; GISEL-NEXT:    v_mul_hi_u32 v18, s12, v5
1312; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1313; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
1314; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v6
1315; GISEL-NEXT:    v_mul_lo_u32 v11, s12, v7
1316; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1317; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1318; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
1319; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
1320; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1321; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1322; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
1323; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
1324; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1325; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
1326; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
1327; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
1328; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1329; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
1330; GISEL-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
1331; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
1332; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
1333; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
1334; GISEL-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
1335; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
1336; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
1337; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
1338; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
1339; GISEL-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
1340; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
1341; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
1342; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1343; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1344; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
1345; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1346; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
1347; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
1348; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
1349; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1350; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
1351; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1352; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1353; GISEL-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
1354; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1355; GISEL-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
1356; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1357; GISEL-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
1358; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
1359; GISEL-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
1360; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
1361; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1362; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
1363; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
1364; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
1365; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
1366; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1367; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1368; GISEL-NEXT:    s_setpc_b64 s[30:31]
1369;
1370; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
1371; CGP:       ; %bb.0:
1372; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1373; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
1374; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1375; CGP-NEXT:    s_movk_i32 s8, 0xf000
1376; CGP-NEXT:    s_movk_i32 s12, 0x1000
1377; CGP-NEXT:    s_bfe_i32 s10, -1, 0x10000
1378; CGP-NEXT:    s_bfe_i32 s11, -1, 0x10000
1379; CGP-NEXT:    s_bfe_i32 s13, -1, 0x10000
1380; CGP-NEXT:    s_bfe_i32 s14, -1, 0x10000
1381; CGP-NEXT:    v_mov_b32_e32 v6, v4
1382; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1383; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1384; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1385; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1386; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1387; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1388; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1389; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1390; CGP-NEXT:    v_trunc_f32_e32 v6, v6
1391; CGP-NEXT:    v_trunc_f32_e32 v7, v7
1392; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1393; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
1394; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1395; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
1396; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
1397; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
1398; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
1399; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
1400; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
1401; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
1402; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
1403; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
1404; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
1405; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
1406; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1407; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
1408; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
1409; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
1410; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1411; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
1412; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
1413; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
1414; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1415; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1416; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
1417; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
1418; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
1419; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
1420; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
1421; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1422; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1423; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1424; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
1425; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
1426; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
1427; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1428; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1429; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1430; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1431; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1432; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1433; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1434; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1435; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1436; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1437; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1438; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1439; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1440; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1441; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1442; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1443; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1444; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1445; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1446; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1447; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1448; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1449; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1450; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1451; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1452; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1453; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1454; CGP-NEXT:    v_mul_lo_u32 v11, s8, v4
1455; CGP-NEXT:    v_mul_lo_u32 v12, -1, v4
1456; CGP-NEXT:    v_mul_hi_u32 v14, s8, v4
1457; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1458; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1459; CGP-NEXT:    v_mul_lo_u32 v15, s8, v5
1460; CGP-NEXT:    v_mul_lo_u32 v16, -1, v5
1461; CGP-NEXT:    v_mul_hi_u32 v17, s8, v5
1462; CGP-NEXT:    v_mul_lo_u32 v18, s8, v13
1463; CGP-NEXT:    v_mul_lo_u32 v19, v13, v15
1464; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
1465; CGP-NEXT:    v_mul_hi_u32 v18, v5, v15
1466; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
1467; CGP-NEXT:    v_mul_lo_u32 v17, v5, v16
1468; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
1469; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1470; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
1471; CGP-NEXT:    v_mul_lo_u32 v17, s8, v10
1472; CGP-NEXT:    v_mul_lo_u32 v18, v10, v11
1473; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
1474; CGP-NEXT:    v_mul_hi_u32 v17, v4, v11
1475; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
1476; CGP-NEXT:    v_mul_lo_u32 v14, v4, v12
1477; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
1478; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1479; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
1480; CGP-NEXT:    v_mov_b32_e32 v14, s10
1481; CGP-NEXT:    v_mov_b32_e32 v17, s11
1482; CGP-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
1483; CGP-NEXT:    v_mov_b32_e32 v8, s13
1484; CGP-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
1485; CGP-NEXT:    v_mul_hi_u32 v9, v10, v11
1486; CGP-NEXT:    v_mul_hi_u32 v11, v13, v15
1487; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1488; CGP-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
1489; CGP-NEXT:    v_mul_lo_u32 v18, v10, v12
1490; CGP-NEXT:    v_mul_hi_u32 v10, v10, v12
1491; CGP-NEXT:    v_mul_hi_u32 v12, v4, v12
1492; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
1493; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1494; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
1495; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
1496; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
1497; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1498; CGP-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
1499; CGP-NEXT:    v_mul_lo_u32 v19, v13, v16
1500; CGP-NEXT:    v_mul_hi_u32 v13, v13, v16
1501; CGP-NEXT:    v_mul_hi_u32 v16, v5, v16
1502; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
1503; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1504; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1505; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1506; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
1507; CGP-NEXT:    v_mov_b32_e32 v19, s14
1508; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
1509; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
1510; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
1511; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1512; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
1513; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
1514; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1515; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
1516; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1517; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
1518; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1519; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1520; CGP-NEXT:    v_mul_lo_u32 v9, v3, v4
1521; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
1522; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
1523; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
1524; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1525; CGP-NEXT:    v_mul_lo_u32 v11, v1, v5
1526; CGP-NEXT:    v_mul_hi_u32 v12, v0, v5
1527; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
1528; CGP-NEXT:    v_mul_lo_u32 v13, v2, v6
1529; CGP-NEXT:    v_mul_lo_u32 v15, v3, v6
1530; CGP-NEXT:    v_mul_hi_u32 v16, v2, v6
1531; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
1532; CGP-NEXT:    v_mul_lo_u32 v18, v0, v7
1533; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1534; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1535; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1536; CGP-NEXT:    v_mul_lo_u32 v11, v1, v7
1537; CGP-NEXT:    v_mul_hi_u32 v12, v0, v7
1538; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
1539; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
1540; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1541; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
1542; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1543; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
1544; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1545; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
1546; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1547; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
1548; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1549; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1550; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
1551; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1552; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1553; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
1554; CGP-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
1555; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1556; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1557; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1558; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
1559; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1560; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1561; CGP-NEXT:    v_mul_lo_u32 v10, s12, v4
1562; CGP-NEXT:    v_mul_lo_u32 v13, 0, v4
1563; CGP-NEXT:    v_mul_hi_u32 v15, s12, v4
1564; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1565; CGP-NEXT:    v_mul_lo_u32 v12, s12, v5
1566; CGP-NEXT:    v_mul_lo_u32 v16, 0, v5
1567; CGP-NEXT:    v_mul_hi_u32 v18, s12, v5
1568; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1569; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
1570; CGP-NEXT:    v_mul_lo_u32 v9, s12, v6
1571; CGP-NEXT:    v_mul_lo_u32 v11, s12, v7
1572; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1573; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1574; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
1575; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
1576; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1577; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1578; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
1579; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
1580; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1581; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
1582; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
1583; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
1584; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1585; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
1586; CGP-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
1587; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
1588; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
1589; CGP-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
1590; CGP-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
1591; CGP-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
1592; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
1593; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
1594; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
1595; CGP-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
1596; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
1597; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
1598; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1599; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1600; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
1601; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1602; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
1603; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
1604; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
1605; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1606; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
1607; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1608; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1609; CGP-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
1610; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1611; CGP-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
1612; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1613; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
1614; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
1615; CGP-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
1616; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
1617; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1618; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
1619; CGP-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
1620; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
1621; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
1622; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1623; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1624; CGP-NEXT:    s_setpc_b64 s[30:31]
1625  %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
1626  ret <2 x i64> %result
1627}
1628
1629define i64 @v_udiv_i64_oddk_denom(i64 %num) {
1630; CHECK-LABEL: v_udiv_i64_oddk_denom:
1631; CHECK:       ; %bb.0:
1632; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
1634; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
1635; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
1636; CHECK-NEXT:    s_mov_b32 s7, 0x12d8fb
1637; CHECK-NEXT:    s_bfe_i32 s4, -1, 0x10000
1638; CHECK-NEXT:    s_bfe_i32 s5, -1, 0x10000
1639; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
1640; CHECK-NEXT:    v_mov_b32_e32 v3, s4
1641; CHECK-NEXT:    v_mov_b32_e32 v4, s5
1642; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1643; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
1644; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
1645; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
1646; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v5
1647; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
1648; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
1649; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v5
1650; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
1651; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
1652; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
1653; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1654; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v7
1655; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
1656; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v7
1657; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1658; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v6
1659; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v6
1660; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
1661; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v6
1662; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1663; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1664; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
1665; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1666; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1667; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1668; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
1669; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1670; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1671; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v10
1672; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1673; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1674; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1675; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1676; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1677; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
1678; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1679; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
1680; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
1681; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
1682; CHECK-NEXT:    v_mul_lo_u32 v10, s6, v7
1683; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v6
1684; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
1685; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v6
1686; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
1687; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
1688; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v8
1689; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v8
1690; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v8
1691; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v8
1692; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v9
1693; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1694; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
1695; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1696; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
1697; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1698; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v13
1699; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1700; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1701; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
1702; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1703; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1704; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1705; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
1706; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
1707; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1708; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1709; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
1710; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
1711; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1712; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
1713; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
1714; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
1715; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
1716; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1717; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1718; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
1719; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1720; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1721; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1722; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
1723; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1724; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1725; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
1726; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1727; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1728; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1729; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
1730; CHECK-NEXT:    v_mul_lo_u32 v8, 0, v2
1731; CHECK-NEXT:    v_mul_hi_u32 v9, s7, v2
1732; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1733; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v5
1734; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
1735; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
1736; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1737; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
1738; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
1739; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1740; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
1741; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
1742; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
1743; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
1744; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1745; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v7
1746; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
1747; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1748; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
1749; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1750; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
1751; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1752; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1753; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1754; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1755; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
1756; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
1757; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1758; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1759; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1760; CHECK-NEXT:    s_setpc_b64 s[30:31]
1761  %result = udiv i64 %num, 1235195
1762  ret i64 %result
1763}
1764
1765define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
1766; GISEL-LABEL: v_udiv_v2i64_oddk_denom:
1767; GISEL:       ; %bb.0:
1768; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1769; GISEL-NEXT:    s_mov_b32 s12, 0x12d8fb
1770; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s12
1771; GISEL-NEXT:    s_sub_u32 s8, 0, s12
1772; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1773; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1774; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1775; GISEL-NEXT:    s_and_b32 s4, s4, 1
1776; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1777; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1778; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1779; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1780; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1781; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1782; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
1783; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
1784; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1785; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1786; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1787; GISEL-NEXT:    s_sub_u32 s13, 0, s12
1788; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1789; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1790; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1791; GISEL-NEXT:    s_and_b32 s4, s4, 1
1792; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1793; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1794; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1795; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1796; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1797; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1798; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1799; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1800; GISEL-NEXT:    v_mul_lo_u32 v8, s13, v6
1801; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1802; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1803; GISEL-NEXT:    v_mul_lo_u32 v10, s13, v4
1804; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1805; GISEL-NEXT:    v_mul_hi_u32 v12, s13, v4
1806; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1807; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1808; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1809; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1810; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1811; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1812; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1813; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1814; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1815; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1816; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1817; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1818; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1819; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1820; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1821; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1822; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1823; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1824; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1825; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1826; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1827; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1828; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1829; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1830; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1831; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1832; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1833; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1834; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1835; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1836; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1837; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1838; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1839; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1840; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1841; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1842; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1843; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1844; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1845; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1846; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1847; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1848; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1849; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1850; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1851; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1852; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1853; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1854; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1855; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1856; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1857; GISEL-NEXT:    v_mul_lo_u32 v11, s13, v4
1858; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v4
1859; GISEL-NEXT:    v_mul_hi_u32 v14, s13, v4
1860; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1861; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1862; GISEL-NEXT:    v_mul_lo_u32 v15, s8, v5
1863; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v5
1864; GISEL-NEXT:    v_mul_hi_u32 v17, s8, v5
1865; GISEL-NEXT:    v_mul_lo_u32 v18, s8, v13
1866; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v15
1867; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
1868; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v15
1869; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
1870; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v16
1871; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
1872; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1873; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
1874; GISEL-NEXT:    v_mul_lo_u32 v17, s13, v10
1875; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v11
1876; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
1877; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v11
1878; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
1879; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v12
1880; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
1881; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1882; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
1883; GISEL-NEXT:    v_mov_b32_e32 v14, s10
1884; GISEL-NEXT:    v_mov_b32_e32 v17, s11
1885; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
1886; GISEL-NEXT:    s_bfe_i32 s14, -1, 0x10000
1887; GISEL-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
1888; GISEL-NEXT:    v_mov_b32_e32 v8, s13
1889; GISEL-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
1890; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v11
1891; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v15
1892; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1893; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
1894; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v12
1895; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v12
1896; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v12
1897; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
1898; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1899; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
1900; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
1901; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
1902; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1903; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
1904; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v16
1905; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v16
1906; GISEL-NEXT:    v_mul_hi_u32 v16, v5, v16
1907; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
1908; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1909; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1910; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1911; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
1912; GISEL-NEXT:    v_mov_b32_e32 v19, s14
1913; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
1914; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
1915; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
1916; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1917; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
1918; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
1919; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1920; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
1921; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1922; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
1923; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1924; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1925; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
1926; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1927; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1928; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
1929; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1930; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v5
1931; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v5
1932; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1933; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v6
1934; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v6
1935; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v6
1936; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1937; GISEL-NEXT:    v_mul_lo_u32 v18, v0, v7
1938; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1939; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1940; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1941; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
1942; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v7
1943; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1944; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
1945; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1946; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
1947; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1948; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
1949; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1950; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
1951; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1952; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
1953; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1954; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1955; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
1956; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1957; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1958; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
1959; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
1960; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1961; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1962; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1963; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
1964; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1965; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1966; GISEL-NEXT:    v_mul_lo_u32 v10, s12, v4
1967; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
1968; GISEL-NEXT:    v_mul_hi_u32 v15, s12, v4
1969; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1970; GISEL-NEXT:    v_mul_lo_u32 v12, s12, v5
1971; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
1972; GISEL-NEXT:    v_mul_hi_u32 v18, s12, v5
1973; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1974; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
1975; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v6
1976; GISEL-NEXT:    v_mul_lo_u32 v11, s12, v7
1977; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1978; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1979; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
1980; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
1981; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1982; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
1983; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
1984; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
1985; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1986; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
1987; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
1988; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
1989; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1990; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
1991; GISEL-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
1992; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
1993; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
1994; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
1995; GISEL-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
1996; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
1997; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
1998; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
1999; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
2000; GISEL-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
2001; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
2002; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
2003; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2004; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
2005; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
2006; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2007; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
2008; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
2009; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
2010; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2011; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
2012; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2013; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
2014; GISEL-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
2015; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2016; GISEL-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
2017; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
2018; GISEL-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
2019; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
2020; GISEL-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
2021; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
2022; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2023; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
2024; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
2025; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
2026; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
2027; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
2028; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
2029; GISEL-NEXT:    s_setpc_b64 s[30:31]
2030;
2031; CGP-LABEL: v_udiv_v2i64_oddk_denom:
2032; CGP:       ; %bb.0:
2033; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
2035; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
2036; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
2037; CGP-NEXT:    s_mov_b32 s12, 0x12d8fb
2038; CGP-NEXT:    s_bfe_i32 s10, -1, 0x10000
2039; CGP-NEXT:    s_bfe_i32 s11, -1, 0x10000
2040; CGP-NEXT:    s_bfe_i32 s13, -1, 0x10000
2041; CGP-NEXT:    s_bfe_i32 s14, -1, 0x10000
2042; CGP-NEXT:    v_mov_b32_e32 v6, v4
2043; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2044; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
2045; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2046; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
2047; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2048; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
2049; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
2050; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
2051; CGP-NEXT:    v_trunc_f32_e32 v6, v6
2052; CGP-NEXT:    v_trunc_f32_e32 v7, v7
2053; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
2054; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
2055; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
2056; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
2057; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2058; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
2059; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2060; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
2061; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
2062; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
2063; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
2064; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
2065; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
2066; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
2067; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2068; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
2069; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2070; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
2071; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
2072; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
2073; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
2074; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
2075; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2076; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2077; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
2078; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
2079; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
2080; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
2081; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
2082; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
2083; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
2084; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2085; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
2086; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
2087; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
2088; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
2089; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2090; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
2091; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2092; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2093; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2094; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
2095; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2096; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
2097; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2098; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2099; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
2100; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2101; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2102; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
2103; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
2104; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2105; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2106; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2107; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2108; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2109; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2110; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
2111; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2112; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
2113; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2114; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
2115; CGP-NEXT:    v_mul_lo_u32 v11, s8, v4
2116; CGP-NEXT:    v_mul_lo_u32 v12, -1, v4
2117; CGP-NEXT:    v_mul_hi_u32 v14, s8, v4
2118; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
2119; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
2120; CGP-NEXT:    v_mul_lo_u32 v15, s8, v5
2121; CGP-NEXT:    v_mul_lo_u32 v16, -1, v5
2122; CGP-NEXT:    v_mul_hi_u32 v17, s8, v5
2123; CGP-NEXT:    v_mul_lo_u32 v18, s8, v13
2124; CGP-NEXT:    v_mul_lo_u32 v19, v13, v15
2125; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
2126; CGP-NEXT:    v_mul_hi_u32 v18, v5, v15
2127; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
2128; CGP-NEXT:    v_mul_lo_u32 v17, v5, v16
2129; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
2130; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2131; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
2132; CGP-NEXT:    v_mul_lo_u32 v17, s8, v10
2133; CGP-NEXT:    v_mul_lo_u32 v18, v10, v11
2134; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
2135; CGP-NEXT:    v_mul_hi_u32 v17, v4, v11
2136; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
2137; CGP-NEXT:    v_mul_lo_u32 v14, v4, v12
2138; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
2139; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2140; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
2141; CGP-NEXT:    v_mov_b32_e32 v14, s10
2142; CGP-NEXT:    v_mov_b32_e32 v17, s11
2143; CGP-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
2144; CGP-NEXT:    v_mov_b32_e32 v8, s13
2145; CGP-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
2146; CGP-NEXT:    v_mul_hi_u32 v9, v10, v11
2147; CGP-NEXT:    v_mul_hi_u32 v11, v13, v15
2148; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
2149; CGP-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
2150; CGP-NEXT:    v_mul_lo_u32 v18, v10, v12
2151; CGP-NEXT:    v_mul_hi_u32 v10, v10, v12
2152; CGP-NEXT:    v_mul_hi_u32 v12, v4, v12
2153; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
2154; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2155; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
2156; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
2157; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
2158; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
2159; CGP-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
2160; CGP-NEXT:    v_mul_lo_u32 v19, v13, v16
2161; CGP-NEXT:    v_mul_hi_u32 v13, v13, v16
2162; CGP-NEXT:    v_mul_hi_u32 v16, v5, v16
2163; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
2164; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2165; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
2166; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
2167; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
2168; CGP-NEXT:    v_mov_b32_e32 v19, s14
2169; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
2170; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
2171; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
2172; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
2173; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
2174; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
2175; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
2176; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
2177; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
2178; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
2179; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
2180; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
2181; CGP-NEXT:    v_mul_lo_u32 v9, v3, v4
2182; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
2183; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2184; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
2185; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
2186; CGP-NEXT:    v_mul_lo_u32 v11, v1, v5
2187; CGP-NEXT:    v_mul_hi_u32 v12, v0, v5
2188; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
2189; CGP-NEXT:    v_mul_lo_u32 v13, v2, v6
2190; CGP-NEXT:    v_mul_lo_u32 v15, v3, v6
2191; CGP-NEXT:    v_mul_hi_u32 v16, v2, v6
2192; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
2193; CGP-NEXT:    v_mul_lo_u32 v18, v0, v7
2194; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
2195; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2196; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2197; CGP-NEXT:    v_mul_lo_u32 v11, v1, v7
2198; CGP-NEXT:    v_mul_hi_u32 v12, v0, v7
2199; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
2200; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2201; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2202; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
2203; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2204; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
2205; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2206; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
2207; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2208; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
2209; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2210; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2211; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
2212; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2213; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
2214; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
2215; CGP-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
2216; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2217; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
2218; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2219; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
2220; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2221; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2222; CGP-NEXT:    v_mul_lo_u32 v10, s12, v4
2223; CGP-NEXT:    v_mul_lo_u32 v13, 0, v4
2224; CGP-NEXT:    v_mul_hi_u32 v15, s12, v4
2225; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2226; CGP-NEXT:    v_mul_lo_u32 v12, s12, v5
2227; CGP-NEXT:    v_mul_lo_u32 v16, 0, v5
2228; CGP-NEXT:    v_mul_hi_u32 v18, s12, v5
2229; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
2230; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
2231; CGP-NEXT:    v_mul_lo_u32 v9, s12, v6
2232; CGP-NEXT:    v_mul_lo_u32 v11, s12, v7
2233; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
2234; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
2235; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
2236; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
2237; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2238; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
2239; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
2240; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
2241; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
2242; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
2243; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
2244; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
2245; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2246; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
2247; CGP-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
2248; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
2249; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
2250; CGP-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
2251; CGP-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
2252; CGP-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
2253; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
2254; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
2255; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
2256; CGP-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
2257; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
2258; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
2259; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2260; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
2261; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
2262; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2263; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
2264; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
2265; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
2266; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2267; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
2268; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2269; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
2270; CGP-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
2271; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2272; CGP-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
2273; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
2274; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
2275; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
2276; CGP-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
2277; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
2278; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2279; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
2280; CGP-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
2281; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
2282; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
2283; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
2284; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
2285; CGP-NEXT:    s_setpc_b64 s[30:31]
2286  %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
2287  ret <2 x i64> %result
2288}
2289
2290define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
2291; CHECK-LABEL: v_udiv_i64_pow2_shl_denom:
2292; CHECK:       ; %bb.0:
2293; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2294; CHECK-NEXT:    s_mov_b64 s[4:5], 0x1000
2295; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
2296; CHECK-NEXT:    v_or_b32_e32 v3, v1, v5
2297; CHECK-NEXT:    v_mov_b32_e32 v2, 0
2298; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2299; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
2300; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2301; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2302; CHECK-NEXT:    s_cbranch_execz BB7_2
2303; CHECK-NEXT:  ; %bb.1:
2304; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
2305; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
2306; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
2307; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v5, vcc
2308; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
2309; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2310; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2311; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
2312; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
2313; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
2314; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
2315; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
2316; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v3
2317; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v2
2318; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v2
2319; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v2
2320; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2321; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v9
2322; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v9
2323; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v9
2324; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2325; CHECK-NEXT:    v_mul_lo_u32 v11, v2, v8
2326; CHECK-NEXT:    v_mul_lo_u32 v13, v3, v8
2327; CHECK-NEXT:    v_mul_hi_u32 v14, v2, v8
2328; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v8
2329; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2330; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2331; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
2332; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2333; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2334; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2335; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
2336; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2337; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2338; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
2339; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2340; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2341; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2342; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2343; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
2344; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
2345; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
2346; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v2
2347; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v2
2348; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v2
2349; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
2350; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
2351; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v8
2352; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
2353; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2354; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2355; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
2356; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
2357; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v6
2358; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
2359; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2360; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2361; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
2362; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2363; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
2364; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2365; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
2366; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2367; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
2368; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
2369; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
2370; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2371; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2372; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
2373; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
2374; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
2375; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2376; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
2377; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
2378; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2379; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v3
2380; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v3
2381; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v3
2382; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
2383; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2384; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2385; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
2386; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2387; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2388; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2389; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
2390; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2391; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2392; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
2393; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
2394; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2395; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2396; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v2
2397; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
2398; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
2399; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
2400; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v3
2401; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
2402; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v3, vcc
2403; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2404; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
2405; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
2406; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
2407; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
2408; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
2409; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
2410; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
2411; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2412; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v5
2413; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2414; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2415; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v5
2416; CHECK-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
2417; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
2418; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2419; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2420; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2421; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
2422; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
2423; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
2424; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
2425; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2426; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
2427; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
2428; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
2429; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
2430; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
2431; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
2432; CHECK-NEXT:    ; implicit-def: $vgpr0
2433; CHECK-NEXT:  BB7_2: ; %Flow
2434; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
2435; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
2436; CHECK-NEXT:    s_cbranch_execz BB7_4
2437; CHECK-NEXT:  ; %bb.3:
2438; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
2439; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, 0, v4
2440; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
2441; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
2442; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
2443; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
2444; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2445; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
2446; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
2447; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
2448; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
2449; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
2450; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2451; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2452; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v0, v4
2453; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2454; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
2455; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2456; CHECK-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2457; CHECK-NEXT:    v_mov_b32_e32 v3, 0
2458; CHECK-NEXT:  BB7_4:
2459; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
2460; CHECK-NEXT:    v_mov_b32_e32 v0, v2
2461; CHECK-NEXT:    v_mov_b32_e32 v1, v3
2462; CHECK-NEXT:    s_setpc_b64 s[30:31]
2463  %shl.y = shl i64 4096, %y
2464  %r = udiv i64 %x, %shl.y
2465  ret i64 %r
2466}
2467
2468define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2469; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
2470; GISEL:       ; %bb.0:
2471; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472; GISEL-NEXT:    s_mov_b64 s[4:5], 0x1000
2473; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v4
2474; GISEL-NEXT:    v_lshl_b64 v[6:7], s[4:5], v6
2475; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
2476; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
2477; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
2478; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
2479; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
2480; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
2481; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
2482; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
2483; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
2484; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
2485; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
2486; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
2487; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2488; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
2489; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
2490; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
2491; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2492; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2493; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
2494; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
2495; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
2496; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2497; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2498; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2499; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2500; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2501; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
2502; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
2503; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
2504; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
2505; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2506; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
2507; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2508; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
2509; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2510; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2511; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2512; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
2513; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2514; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2515; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
2516; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2517; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
2518; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
2519; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
2520; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
2521; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2522; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2523; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
2524; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
2525; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
2526; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2527; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2528; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
2529; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2530; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
2531; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
2532; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
2533; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
2534; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2535; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2536; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
2537; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2538; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
2539; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
2540; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2541; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2542; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
2543; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
2544; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2545; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
2546; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
2547; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
2548; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
2549; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
2550; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2551; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2552; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2553; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2554; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2555; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
2556; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
2557; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
2558; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2559; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2560; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2561; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2562; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2563; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2564; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2565; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2566; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
2567; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2568; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
2569; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
2570; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
2571; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
2572; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2573; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
2574; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
2575; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
2576; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
2577; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
2578; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2579; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
2580; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
2581; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
2582; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
2583; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v4
2584; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2585; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
2586; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v8
2587; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
2588; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
2589; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
2590; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2591; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2592; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
2593; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
2594; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
2595; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
2596; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2597; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
2598; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
2599; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2600; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
2601; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
2602; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
2603; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
2604; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2605; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2606; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2607; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2608; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
2609; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2610; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
2611; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
2612; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
2613; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
2614; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
2615; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
2616; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
2617; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
2618; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2619; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
2620; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
2621; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
2622; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
2623; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2624; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2625; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2626; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2627; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2628; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
2629; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
2630; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
2631; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
2632; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2633; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
2634; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2635; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2636; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2637; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2638; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2639; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
2640; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2641; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2642; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
2643; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
2644; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
2645; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
2646; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
2647; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
2648; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2649; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2650; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
2651; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
2652; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
2653; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2654; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2655; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2656; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2657; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
2658; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2659; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
2660; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
2661; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2662; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2663; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
2664; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2665; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
2666; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
2667; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2668; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2669; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
2670; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2671; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
2672; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
2673; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
2674; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
2675; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
2676; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
2677; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2678; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2679; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2680; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2681; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2682; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
2683; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
2684; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
2685; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
2686; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2687; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2688; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2689; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2690; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2691; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2692; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2693; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
2694; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
2695; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
2696; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
2697; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v5
2698; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v4
2699; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2700; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
2701; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
2702; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
2703; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
2704; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
2705; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2706; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
2707; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2708; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
2709; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
2710; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
2711; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
2712; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
2713; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
2714; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
2715; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
2716; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2717; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
2718; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
2719; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
2720; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
2721; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
2722; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
2723; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
2724; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
2725; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
2726; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2727; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
2728; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
2729; GISEL-NEXT:    s_setpc_b64 s[30:31]
2730;
2731; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
2732; CGP:       ; %bb.0:
2733; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734; CGP-NEXT:    v_mov_b32_e32 v5, v0
2735; CGP-NEXT:    v_mov_b32_e32 v7, v1
2736; CGP-NEXT:    s_mov_b64 s[4:5], 0x1000
2737; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
2738; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
2739; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
2740; CGP-NEXT:    v_mov_b32_e32 v0, 0
2741; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2742; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
2743; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2744; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2745; CGP-NEXT:    s_cbranch_execz BB8_2
2746; CGP-NEXT:  ; %bb.1:
2747; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2748; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
2749; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
2750; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
2751; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
2752; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2753; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
2754; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
2755; CGP-NEXT:    v_trunc_f32_e32 v1, v1
2756; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
2757; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
2758; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2759; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
2760; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
2761; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
2762; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
2763; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
2764; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
2765; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
2766; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
2767; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
2768; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
2769; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
2770; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
2771; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
2772; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2773; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2774; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
2775; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2776; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2777; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2778; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
2779; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2780; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2781; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
2782; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2783; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2784; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2785; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2786; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
2787; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
2788; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
2789; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
2790; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
2791; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
2792; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
2793; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
2794; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
2795; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
2796; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
2797; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
2798; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
2799; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
2800; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
2801; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
2802; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
2803; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2804; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
2805; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2806; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
2807; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2808; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
2809; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2810; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
2811; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
2812; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
2813; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2814; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
2815; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
2816; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
2817; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2818; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2819; CGP-NEXT:    v_mul_lo_u32 v4, v7, v0
2820; CGP-NEXT:    v_mul_hi_u32 v6, v5, v0
2821; CGP-NEXT:    v_mul_hi_u32 v0, v7, v0
2822; CGP-NEXT:    v_mul_lo_u32 v12, v5, v1
2823; CGP-NEXT:    v_mul_lo_u32 v13, v7, v1
2824; CGP-NEXT:    v_mul_hi_u32 v14, v5, v1
2825; CGP-NEXT:    v_mul_hi_u32 v1, v7, v1
2826; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2827; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2828; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
2829; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2830; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2831; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2832; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
2833; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2834; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2835; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
2836; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2837; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2838; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2839; CGP-NEXT:    v_mul_lo_u32 v6, v10, v0
2840; CGP-NEXT:    v_mul_lo_u32 v12, v11, v0
2841; CGP-NEXT:    v_mul_hi_u32 v13, v10, v0
2842; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
2843; CGP-NEXT:    v_mul_lo_u32 v4, v10, v1
2844; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
2845; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
2846; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2847; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
2848; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
2849; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
2850; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
2851; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v7, v4, vcc
2852; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v7, v4
2853; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v10
2854; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2855; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v11
2856; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2857; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v11, vcc
2858; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v11
2859; CGP-NEXT:    v_cndmask_b32_e32 v6, v13, v7, vcc
2860; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v10
2861; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
2862; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v10
2863; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
2864; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v11
2865; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
2866; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
2867; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v5, vcc
2868; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2869; CGP-NEXT:    v_cndmask_b32_e32 v4, v14, v12, vcc
2870; CGP-NEXT:    v_cndmask_b32_e32 v5, v15, v16, vcc
2871; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
2872; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2873; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2874; CGP-NEXT:    ; implicit-def: $vgpr10_vgpr11
2875; CGP-NEXT:    ; implicit-def: $vgpr5
2876; CGP-NEXT:  BB8_2: ; %Flow2
2877; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
2878; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
2879; CGP-NEXT:    s_cbranch_execz BB8_4
2880; CGP-NEXT:  ; %bb.3:
2881; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2882; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v10
2883; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2884; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2885; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2886; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
2887; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
2888; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2889; CGP-NEXT:    v_mul_hi_u32 v0, v5, v0
2890; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
2891; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
2892; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v1
2893; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
2894; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2895; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v1, v10
2896; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2897; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
2898; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
2899; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2900; CGP-NEXT:    v_mov_b32_e32 v1, 0
2901; CGP-NEXT:  BB8_4:
2902; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
2903; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
2904; CGP-NEXT:    v_mov_b32_e32 v4, 0
2905; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
2906; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
2907; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2908; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2909; CGP-NEXT:    s_cbranch_execz BB8_6
2910; CGP-NEXT:  ; %bb.5:
2911; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
2912; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
2913; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v8
2914; CGP-NEXT:    v_subb_u32_e32 v7, vcc, 0, v9, vcc
2915; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2916; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2917; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2918; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2919; CGP-NEXT:    v_trunc_f32_e32 v5, v5
2920; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2921; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2922; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2923; CGP-NEXT:    v_mul_lo_u32 v10, v6, v5
2924; CGP-NEXT:    v_mul_lo_u32 v11, v6, v4
2925; CGP-NEXT:    v_mul_lo_u32 v12, v7, v4
2926; CGP-NEXT:    v_mul_hi_u32 v13, v6, v4
2927; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
2928; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
2929; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
2930; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
2931; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
2932; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
2933; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
2934; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2935; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
2936; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2937; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2938; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
2939; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2940; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2941; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2942; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
2943; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2944; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2945; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
2946; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2947; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2948; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2949; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2950; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
2951; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
2952; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
2953; CGP-NEXT:    v_mul_lo_u32 v10, v6, v4
2954; CGP-NEXT:    v_mul_lo_u32 v7, v7, v4
2955; CGP-NEXT:    v_mul_hi_u32 v12, v6, v4
2956; CGP-NEXT:    v_mul_lo_u32 v6, v6, v11
2957; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
2958; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
2959; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
2960; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2961; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
2962; CGP-NEXT:    v_mul_lo_u32 v7, v4, v6
2963; CGP-NEXT:    v_mul_lo_u32 v12, v11, v6
2964; CGP-NEXT:    v_mul_hi_u32 v15, v4, v6
2965; CGP-NEXT:    v_mul_hi_u32 v6, v11, v6
2966; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v7
2967; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2968; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
2969; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2970; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v14
2971; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2972; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
2973; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2974; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2975; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
2976; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
2977; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2978; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2979; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2980; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
2981; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
2982; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2983; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
2984; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
2985; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2986; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
2987; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
2988; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
2989; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
2990; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
2991; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2992; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
2993; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2994; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2995; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2996; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2997; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2998; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
2999; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
3000; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
3001; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
3002; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
3003; CGP-NEXT:    v_mul_lo_u32 v7, v8, v4
3004; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
3005; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
3006; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
3007; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
3008; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v4
3009; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v5, vcc
3010; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
3011; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
3012; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
3013; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
3014; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
3015; CGP-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v6, vcc
3016; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
3017; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v8
3018; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
3019; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v9
3020; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
3021; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
3022; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v9
3023; CGP-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
3024; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
3025; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
3026; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3027; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
3028; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
3029; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
3030; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
3031; CGP-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
3032; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
3033; CGP-NEXT:    v_cndmask_b32_e32 v2, v12, v10, vcc
3034; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v14, vcc
3035; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
3036; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
3037; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
3038; CGP-NEXT:    ; implicit-def: $vgpr8_vgpr9
3039; CGP-NEXT:    ; implicit-def: $vgpr2
3040; CGP-NEXT:  BB8_6: ; %Flow
3041; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
3042; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
3043; CGP-NEXT:    s_cbranch_execz BB8_8
3044; CGP-NEXT:  ; %bb.7:
3045; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
3046; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v8
3047; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
3048; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
3049; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
3050; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
3051; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
3052; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
3053; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
3054; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
3055; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
3056; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
3057; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3058; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
3059; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v2, v8
3060; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
3061; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
3062; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3063; CGP-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
3064; CGP-NEXT:    v_mov_b32_e32 v5, 0
3065; CGP-NEXT:  BB8_8:
3066; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
3067; CGP-NEXT:    v_mov_b32_e32 v2, v4
3068; CGP-NEXT:    v_mov_b32_e32 v3, v5
3069; CGP-NEXT:    s_setpc_b64 s[30:31]
3070  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
3071  %r = udiv <2 x i64> %x, %shl.y
3072  ret <2 x i64> %r
3073}
3074
3075define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
3076; GISEL-LABEL: v_udiv_i64_24bit:
3077; GISEL:       ; %bb.0:
3078; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3079; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
3080; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
3081; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
3082; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
3083; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
3084; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3085; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
3086; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
3087; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
3088; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
3089; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
3090; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
3091; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
3092; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
3093; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
3094; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3095; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
3096; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
3097; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3098; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
3099; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3100; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
3101; GISEL-NEXT:    v_mov_b32_e32 v1, 0
3102; GISEL-NEXT:    s_setpc_b64 s[30:31]
3103;
3104; CGP-LABEL: v_udiv_i64_24bit:
3105; CGP:       ; %bb.0:
3106; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3107; CGP-NEXT:    s_mov_b32 s4, 0xffffff
3108; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
3109; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
3110; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
3111; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v1
3112; CGP-NEXT:    v_rcp_f32_e32 v2, v1
3113; CGP-NEXT:    v_mul_f32_e32 v2, v0, v2
3114; CGP-NEXT:    v_trunc_f32_e32 v2, v2
3115; CGP-NEXT:    v_mad_f32 v0, -v2, v1, v0
3116; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
3117; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v1
3118; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
3119; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
3120; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
3121; CGP-NEXT:    v_mov_b32_e32 v1, 0
3122; CGP-NEXT:    s_setpc_b64 s[30:31]
3123  %num.mask = and i64 %num, 16777215
3124  %den.mask = and i64 %den, 16777215
3125  %result = udiv i64 %num.mask, %den.mask
3126  ret i64 %result
3127}
3128
3129define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
3130; GISEL-LABEL: v_udiv_v2i64_24bit:
3131; GISEL:       ; %bb.0:
3132; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3133; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
3134; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v1, 0
3135; GISEL-NEXT:    v_and_b32_e32 v3, s6, v4
3136; GISEL-NEXT:    v_and_b32_e32 v4, s6, v6
3137; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
3138; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
3139; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
3140; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
3141; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v4
3142; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
3143; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v1
3144; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v1
3145; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v5
3146; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
3147; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
3148; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
3149; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v1
3150; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
3151; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
3152; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
3153; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v8
3154; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
3155; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
3156; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
3157; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
3158; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
3159; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
3160; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
3161; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v1
3162; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v1
3163; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v1
3164; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
3165; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
3166; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
3167; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
3168; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
3169; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
3170; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v17
3171; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
3172; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v13
3173; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
3174; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3175; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
3176; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
3177; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v14
3178; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
3179; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
3180; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
3181; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
3182; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
3183; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
3184; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3185; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
3186; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3187; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
3188; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v12
3189; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
3190; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
3191; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
3192; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3193; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3194; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3195; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
3196; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
3197; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
3198; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
3199; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
3200; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
3201; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3202; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
3203; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
3204; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
3205; GISEL-NEXT:    s_bfe_i32 s12, -1, 0x10000
3206; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
3207; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
3208; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
3209; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
3210; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
3211; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
3212; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3213; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
3214; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3215; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
3216; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
3217; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
3218; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
3219; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
3220; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
3221; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v1
3222; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v1
3223; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
3224; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
3225; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
3226; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
3227; GISEL-NEXT:    v_mul_hi_u32 v18, v9, v5
3228; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
3229; GISEL-NEXT:    v_mul_lo_u32 v19, v16, v17
3230; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
3231; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v17
3232; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v18
3233; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v9
3234; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
3235; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
3236; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v18, v10
3237; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v1
3238; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v14
3239; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v15
3240; GISEL-NEXT:    v_add_i32_e64 v6, s[8:9], v7, v6
3241; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v15
3242; GISEL-NEXT:    v_add_i32_e64 v6, s[8:9], v6, v10
3243; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v6
3244; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v18, v10
3245; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
3246; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v10, v7
3247; GISEL-NEXT:    v_mov_b32_e32 v7, s10
3248; GISEL-NEXT:    v_mov_b32_e32 v10, s11
3249; GISEL-NEXT:    v_add_i32_e64 v8, s[10:11], v8, v12
3250; GISEL-NEXT:    v_mov_b32_e32 v12, s12
3251; GISEL-NEXT:    v_add_i32_e64 v11, s[10:11], v11, v13
3252; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v15
3253; GISEL-NEXT:    v_mul_hi_u32 v15, v16, v17
3254; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
3255; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
3256; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v6
3257; GISEL-NEXT:    v_mul_hi_u32 v14, v14, v6
3258; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
3259; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v18, v13
3260; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
3261; GISEL-NEXT:    v_add_i32_e64 v6, s[8:9], v13, v6
3262; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[8:9]
3263; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v18, v13
3264; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
3265; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
3266; GISEL-NEXT:    v_mul_lo_u32 v19, v16, v9
3267; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
3268; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
3269; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v19, v15
3270; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
3271; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v15, v9
3272; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
3273; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v19, v15
3274; GISEL-NEXT:    v_mov_b32_e32 v19, s13
3275; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v17
3276; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
3277; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v18
3278; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
3279; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v13, v17
3280; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
3281; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
3282; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v16, v15
3283; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v13, vcc
3284; GISEL-NEXT:    v_addc_u32_e64 v11, vcc, v11, v14, s[4:5]
3285; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
3286; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v8, vcc
3287; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v1
3288; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v1
3289; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
3290; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3291; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
3292; GISEL-NEXT:    v_mul_lo_u32 v11, 0, v5
3293; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v5
3294; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
3295; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v6
3296; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v6
3297; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v6
3298; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
3299; GISEL-NEXT:    v_mul_lo_u32 v18, v2, v9
3300; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
3301; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3302; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
3303; GISEL-NEXT:    v_mul_lo_u32 v11, 0, v9
3304; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
3305; GISEL-NEXT:    v_mul_hi_u32 v9, 0, v9
3306; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v15
3307; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
3308; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v16, v1
3309; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
3310; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
3311; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
3312; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
3313; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
3314; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v17
3315; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
3316; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
3317; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
3318; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
3319; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
3320; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
3321; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v17
3322; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
3323; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
3324; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
3325; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
3326; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
3327; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
3328; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v1
3329; GISEL-NEXT:    v_mul_lo_u32 v15, 0, v1
3330; GISEL-NEXT:    v_mul_hi_u32 v16, v3, v1
3331; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
3332; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v5
3333; GISEL-NEXT:    v_mul_lo_u32 v17, 0, v5
3334; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v5
3335; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
3336; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v11
3337; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v6
3338; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
3339; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
3340; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
3341; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v1
3342; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v6, vcc
3343; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
3344; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
3345; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v13
3346; GISEL-NEXT:    v_subb_u32_e64 v13, s[4:5], 0, v9, vcc
3347; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
3348; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
3349; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
3350; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], 1, v5
3351; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, v8, s[6:7]
3352; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v14
3353; GISEL-NEXT:    v_subb_u32_e64 v14, s[8:9], 0, v11, s[6:7]
3354; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v16, s[4:5]
3355; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
3356; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
3357; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
3358; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s[4:5]
3359; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], 1, v15
3360; GISEL-NEXT:    v_addc_u32_e64 v16, s[4:5], 0, v17, s[4:5]
3361; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], 0, v11
3362; GISEL-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[6:7]
3363; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v4
3364; GISEL-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[4:5]
3365; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
3366; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 1, v13
3367; GISEL-NEXT:    v_addc_u32_e64 v4, s[6:7], 0, v18, s[6:7]
3368; GISEL-NEXT:    v_sub_i32_e64 v9, s[6:7], 0, v9
3369; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
3370; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
3371; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
3372; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
3373; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
3374; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
3375; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
3376; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
3377; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
3378; GISEL-NEXT:    v_cndmask_b32_e32 v3, v19, v3, vcc
3379; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3380; GISEL-NEXT:    v_cndmask_b32_e32 v0, v15, v14, vcc
3381; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v3
3382; GISEL-NEXT:    v_cndmask_b32_e64 v2, v13, v2, s[4:5]
3383; GISEL-NEXT:    v_cndmask_b32_e32 v3, v17, v16, vcc
3384; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
3385; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3386; GISEL-NEXT:    v_cndmask_b32_e64 v4, v18, v4, s[4:5]
3387; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v12
3388; GISEL-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[4:5]
3389; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
3390; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v4, s[4:5]
3391; GISEL-NEXT:    s_setpc_b64 s[30:31]
3392;
3393; CGP-LABEL: v_udiv_v2i64_24bit:
3394; CGP:       ; %bb.0:
3395; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3396; CGP-NEXT:    s_mov_b32 s6, 0xffffff
3397; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3398; CGP-NEXT:    v_and_b32_e32 v1, s6, v2
3399; CGP-NEXT:    v_and_b32_e32 v2, s6, v4
3400; CGP-NEXT:    v_and_b32_e32 v3, s6, v6
3401; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
3402; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v2
3403; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v1
3404; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v3
3405; CGP-NEXT:    v_rcp_f32_e32 v4, v2
3406; CGP-NEXT:    v_rcp_f32_e32 v5, v3
3407; CGP-NEXT:    v_mul_f32_e32 v4, v0, v4
3408; CGP-NEXT:    v_mul_f32_e32 v5, v1, v5
3409; CGP-NEXT:    v_trunc_f32_e32 v4, v4
3410; CGP-NEXT:    v_trunc_f32_e32 v5, v5
3411; CGP-NEXT:    v_mad_f32 v0, -v4, v2, v0
3412; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
3413; CGP-NEXT:    v_mad_f32 v1, -v5, v3, v1
3414; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
3415; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
3416; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
3417; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v1|, v3
3418; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
3419; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
3420; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
3421; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3422; CGP-NEXT:    v_and_b32_e32 v2, s6, v1
3423; CGP-NEXT:    v_mov_b32_e32 v1, 0
3424; CGP-NEXT:    v_mov_b32_e32 v3, 0
3425; CGP-NEXT:    s_setpc_b64 s[30:31]
3426  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
3427  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
3428  %result = udiv <2 x i64> %num.mask, %den.mask
3429  ret <2 x i64> %result
3430}
3431