1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_udiv_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_udiv_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_mov_b32_e32 v4, v0
12; CHECK-NEXT:    v_mov_b32_e32 v5, v1
13; CHECK-NEXT:    v_or_b32_e32 v1, v5, v3
14; CHECK-NEXT:    v_mov_b32_e32 v0, 0
15; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
17; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
18; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
19; CHECK-NEXT:    s_cbranch_execz BB0_2
20; CHECK-NEXT:  ; %bb.1:
21; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v2
22; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v3
23; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
24; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
25; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
26; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
27; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
28; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
29; CHECK-NEXT:    v_trunc_f32_e32 v1, v1
30; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
31; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
32; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
33; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v1
34; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v0
35; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v0
36; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v0
37; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
38; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v9
39; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v9
40; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v9
41; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
42; CHECK-NEXT:    v_mul_lo_u32 v11, v0, v8
43; CHECK-NEXT:    v_mul_lo_u32 v13, v1, v8
44; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
45; CHECK-NEXT:    v_mul_hi_u32 v8, v1, v8
46; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
47; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
48; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
49; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
50; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
51; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
52; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
53; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
54; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
55; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
56; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
57; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
58; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
59; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
60; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
61; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v1, v8, vcc
62; CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v8
63; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v0
64; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v0
65; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v0
66; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
67; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
68; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v8
69; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
70; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
71; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
72; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
73; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
74; CHECK-NEXT:    v_mul_hi_u32 v13, v0, v6
75; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
76; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
77; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
78; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
79; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
80; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
81; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
82; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
83; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
84; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
85; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
86; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
87; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
88; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
89; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
90; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
91; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
92; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
93; CHECK-NEXT:    v_mul_lo_u32 v6, v5, v0
94; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v0
95; CHECK-NEXT:    v_mul_hi_u32 v0, v5, v0
96; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v1
97; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v1
98; CHECK-NEXT:    v_mul_hi_u32 v10, v4, v1
99; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v1
100; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
101; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
102; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
103; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
104; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
105; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
106; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
107; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
108; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
109; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
110; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
111; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
112; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
113; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v0
114; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v0
115; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v0
116; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
117; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v1
118; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v0
119; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
120; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
121; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
122; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
123; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
124; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v4, v7
125; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v5, v6, vcc
126; CHECK-NEXT:    v_sub_i32_e64 v5, s[4:5], v5, v6
127; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
128; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
129; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
130; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
131; CHECK-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
132; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v3
133; CHECK-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
134; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
135; CHECK-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
136; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
137; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
138; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v3
139; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
140; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v3
141; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
142; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
143; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v8, vcc
144; CHECK-NEXT:    v_cndmask_b32_e32 v3, v11, v12, vcc
145; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
146; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
147; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
148; CHECK-NEXT:    ; implicit-def: $vgpr2
149; CHECK-NEXT:    ; implicit-def: $vgpr4
150; CHECK-NEXT:  BB0_2: ; %Flow
151; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
152; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
153; CHECK-NEXT:    s_cbranch_execz BB0_4
154; CHECK-NEXT:  ; %bb.3:
155; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v2
156; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
157; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
158; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
159; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
160; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
161; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
162; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
163; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
164; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v2
165; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
166; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
167; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
168; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
169; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v2
170; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
171; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
172; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
173; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
174; CHECK-NEXT:    v_mov_b32_e32 v1, 0
175; CHECK-NEXT:  BB0_4:
176; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
177; CHECK-NEXT:    s_setpc_b64 s[30:31]
178  %result = udiv i64 %num, %den
179  ret i64 %result
180}
181
182; FIXME: This is a workaround for not handling uniform VGPR case.
183declare i32 @llvm.amdgcn.readfirstlane(i32)
184
185define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
186; CHECK-LABEL: s_udiv_i64:
187; CHECK:       ; %bb.0:
188; CHECK-NEXT:    s_or_b64 s[6:7], s[0:1], s[2:3]
189; CHECK-NEXT:    s_mov_b32 s4, 0
190; CHECK-NEXT:    s_mov_b32 s5, -1
191; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[4:5]
192; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[6:7], 0
193; CHECK-NEXT:    s_cbranch_vccz BB1_2
194; CHECK-NEXT:  ; %bb.1:
195; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
196; CHECK-NEXT:    v_mov_b32_e32 v1, s3
197; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
198; CHECK-NEXT:    s_sub_u32 s6, 0, s2
199; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
200; CHECK-NEXT:    v_mov_b32_e32 v3, s1
201; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
202; CHECK-NEXT:    s_and_b32 s4, s4, 1
203; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
204; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
205; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
206; CHECK-NEXT:    s_subb_u32 s7, 0, s3
207; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
208; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
209; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
210; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
211; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
212; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
213; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
214; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
215; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
216; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
217; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
218; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
219; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
220; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
221; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
222; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
223; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
224; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
225; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
226; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
227; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
228; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
229; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
230; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
231; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
232; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
233; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
234; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
235; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
236; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
237; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
238; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
239; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
240; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
241; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
242; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
243; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
244; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
245; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
246; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
247; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
248; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
249; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
250; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
251; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
252; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
253; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
254; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
255; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
256; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
257; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
258; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
259; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
260; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
261; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
262; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
263; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
264; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
265; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
266; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
267; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
268; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
269; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
270; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
271; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
272; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
273; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
274; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
275; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
276; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
277; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
278; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
279; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
280; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
281; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
282; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
283; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
284; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
285; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
286; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
287; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
288; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
289; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
290; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
291; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
292; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
293; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
294; CHECK-NEXT:    v_mul_hi_u32 v7, s2, v0
295; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
296; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
297; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
298; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
299; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
300; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
301; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, s0, v5
302; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v2, vcc
303; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], s1, v2
304; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v5
305; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
306; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
307; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
308; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v2, v1, vcc
309; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
310; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
311; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v5
312; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
313; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
314; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
315; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
316; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
317; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
318; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
319; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
320; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v4, vcc
321; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
322; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
323; CHECK-NEXT:    s_mov_b32 s5, 0
324; CHECK-NEXT:    s_branch BB1_3
325; CHECK-NEXT:  BB1_2:
326; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
327; CHECK-NEXT:  BB1_3: ; %Flow
328; CHECK-NEXT:    s_xor_b32 s1, s5, -1
329; CHECK-NEXT:    s_and_b32 s1, s1, 1
330; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
331; CHECK-NEXT:    s_cbranch_scc1 BB1_5
332; CHECK-NEXT:  ; %bb.4:
333; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
334; CHECK-NEXT:    s_sub_i32 s1, 0, s2
335; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
336; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
337; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
338; CHECK-NEXT:    v_mul_lo_u32 v1, s1, v0
339; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
340; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
341; CHECK-NEXT:    v_mul_hi_u32 v0, s0, v0
342; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
343; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
344; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
345; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
346; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
347; CHECK-NEXT:    v_subrev_i32_e64 v2, s[0:1], s2, v1
348; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
349; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
350; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
351; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
352; CHECK-NEXT:  BB1_5:
353; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
354; CHECK-NEXT:    s_mov_b32 s1, s0
355; CHECK-NEXT:    ; return to shader part epilog
356  %result = udiv i64 %num, %den
357  %cast = bitcast i64 %result to <2 x i32>
358  %elt.0 = extractelement <2 x i32> %cast, i32 0
359  %elt.1 = extractelement <2 x i32> %cast, i32 1
360  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
361  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
362  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
363  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
364  %cast.back = bitcast <2 x i32> %ins.1 to i64
365  ret i64 %cast.back
366}
367
368define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
369; GISEL-LABEL: v_udiv_v2i64:
370; GISEL:       ; %bb.0:
371; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
373; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
374; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
375; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
376; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
377; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
378; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
379; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
380; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
381; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
382; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
383; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
384; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
385; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
386; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
387; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
388; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
389; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
390; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
391; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
392; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
393; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
394; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
395; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
396; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
397; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
398; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
399; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
400; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
401; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
402; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
403; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
404; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
405; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
406; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
407; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
408; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
409; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
410; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
411; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
412; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
413; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
414; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
415; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
416; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
417; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
418; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
419; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
420; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
421; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
422; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
423; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
424; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
425; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
426; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
427; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
428; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
429; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
430; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
431; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
432; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
433; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
434; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
435; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
436; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
437; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
438; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
439; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
440; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
441; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
442; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
443; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
444; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
445; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
446; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
447; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
448; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
449; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
450; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
451; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
452; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
453; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
454; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
455; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
456; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
457; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
458; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
459; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
460; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
461; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
462; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
463; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
464; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
465; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
466; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
467; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
468; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
469; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
470; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
471; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
472; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
473; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
474; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
475; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
476; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
477; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
478; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
479; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
480; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v4
481; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
482; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
483; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v8
484; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
485; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
486; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
487; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
488; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
489; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
490; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
491; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
492; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
493; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
494; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
495; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
496; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
497; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
498; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
499; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
500; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
501; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
502; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
503; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
504; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
505; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
506; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
507; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
508; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
509; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
510; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
511; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
512; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
513; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
514; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
515; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
516; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
517; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
518; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
519; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
520; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
521; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
522; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
523; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
524; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
525; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
526; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
527; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
528; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
529; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
530; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
531; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
532; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
533; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
534; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
535; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
536; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
537; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
538; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
539; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
540; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
541; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
542; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
543; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
544; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
545; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
546; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
547; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
548; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
549; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
550; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
551; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
552; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
553; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
554; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
555; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
556; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
557; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
558; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
559; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
560; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
561; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
562; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
563; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
564; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
565; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
566; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
567; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
568; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
569; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
570; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
571; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
572; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
573; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
574; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
575; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
576; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
577; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
578; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
579; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
580; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
581; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
582; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
583; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
584; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
585; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
586; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
587; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
588; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
589; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
590; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
591; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
592; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
593; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
594; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v5
595; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v4
596; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
597; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
598; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
599; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
600; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
601; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
602; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
603; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
604; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
605; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
606; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
607; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
608; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
609; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
610; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
611; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
612; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
613; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
614; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
615; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
616; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
617; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
618; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
619; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
620; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
621; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
622; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
623; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
624; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
625; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
626; GISEL-NEXT:    s_setpc_b64 s[30:31]
627;
628; CGP-LABEL: v_udiv_v2i64:
629; CGP:       ; %bb.0:
630; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; CGP-NEXT:    v_mov_b32_e32 v10, v0
632; CGP-NEXT:    v_mov_b32_e32 v11, v1
633; CGP-NEXT:    v_mov_b32_e32 v8, v2
634; CGP-NEXT:    v_mov_b32_e32 v9, v3
635; CGP-NEXT:    v_or_b32_e32 v1, v11, v5
636; CGP-NEXT:    v_mov_b32_e32 v0, 0
637; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
638; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
639; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
640; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
641; CGP-NEXT:    s_cbranch_execz BB2_2
642; CGP-NEXT:  ; %bb.1:
643; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
644; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
645; CGP-NEXT:    v_sub_i32_e32 v2, vcc, 0, v4
646; CGP-NEXT:    v_subb_u32_e32 v3, vcc, 0, v5, vcc
647; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
648; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
649; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
650; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
651; CGP-NEXT:    v_trunc_f32_e32 v1, v1
652; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
653; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
654; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
655; CGP-NEXT:    v_mul_lo_u32 v12, v2, v1
656; CGP-NEXT:    v_mul_lo_u32 v13, v2, v0
657; CGP-NEXT:    v_mul_lo_u32 v14, v3, v0
658; CGP-NEXT:    v_mul_hi_u32 v15, v2, v0
659; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
660; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
661; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
662; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
663; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
664; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
665; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
666; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
667; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
668; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
669; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
670; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
671; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
672; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
673; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
674; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
675; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
676; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
677; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
678; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
679; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
680; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
681; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
682; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
683; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
684; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
685; CGP-NEXT:    v_mul_lo_u32 v12, v2, v0
686; CGP-NEXT:    v_mul_lo_u32 v3, v3, v0
687; CGP-NEXT:    v_mul_hi_u32 v14, v2, v0
688; CGP-NEXT:    v_mul_lo_u32 v2, v2, v13
689; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
690; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
691; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
692; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v3, v2
693; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v14
694; CGP-NEXT:    v_mul_lo_u32 v3, v0, v2
695; CGP-NEXT:    v_mul_lo_u32 v14, v13, v2
696; CGP-NEXT:    v_mul_hi_u32 v17, v0, v2
697; CGP-NEXT:    v_mul_hi_u32 v2, v13, v2
698; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v15, v3
699; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
700; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
701; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
702; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v16
703; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[4:5]
704; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
705; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
706; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v13, v3
707; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
708; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v12, v3
709; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
710; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
711; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v12
712; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
713; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
714; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
715; CGP-NEXT:    v_mul_lo_u32 v2, v11, v0
716; CGP-NEXT:    v_mul_hi_u32 v3, v10, v0
717; CGP-NEXT:    v_mul_hi_u32 v0, v11, v0
718; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
719; CGP-NEXT:    v_mul_lo_u32 v13, v11, v1
720; CGP-NEXT:    v_mul_hi_u32 v14, v10, v1
721; CGP-NEXT:    v_mul_hi_u32 v1, v11, v1
722; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
723; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
724; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
725; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
726; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
727; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
728; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
729; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
730; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
731; CGP-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
732; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
733; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
734; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
735; CGP-NEXT:    v_mul_lo_u32 v3, v4, v0
736; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
737; CGP-NEXT:    v_mul_hi_u32 v13, v4, v0
738; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
739; CGP-NEXT:    v_mul_lo_u32 v2, v4, v1
740; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
741; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
742; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
743; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
744; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
745; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
746; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
747; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v11, v2, vcc
748; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v11, v2
749; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
750; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
751; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
752; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
753; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v5, vcc
754; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v5
755; CGP-NEXT:    v_cndmask_b32_e32 v10, v13, v11, vcc
756; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
757; CGP-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
758; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v4
759; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
760; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
761; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
762; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5
763; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
764; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
765; CGP-NEXT:    v_cndmask_b32_e32 v2, v14, v12, vcc
766; CGP-NEXT:    v_cndmask_b32_e32 v3, v15, v16, vcc
767; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
768; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
769; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
770; CGP-NEXT:    ; implicit-def: $vgpr4
771; CGP-NEXT:    ; implicit-def: $vgpr10
772; CGP-NEXT:  BB2_2: ; %Flow2
773; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
774; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
775; CGP-NEXT:    s_cbranch_execz BB2_4
776; CGP-NEXT:  ; %bb.3:
777; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
778; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
779; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
780; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
781; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
782; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
783; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
784; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
785; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
786; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
787; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
788; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v1
789; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
790; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
791; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v4
792; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
793; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
794; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
795; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
796; CGP-NEXT:    v_mov_b32_e32 v1, 0
797; CGP-NEXT:  BB2_4:
798; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
799; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
800; CGP-NEXT:    v_mov_b32_e32 v2, 0
801; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
802; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
803; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
804; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
805; CGP-NEXT:    s_cbranch_execz BB2_6
806; CGP-NEXT:  ; %bb.5:
807; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v6
808; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v7
809; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v6
810; CGP-NEXT:    v_subb_u32_e32 v5, vcc, 0, v7, vcc
811; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
812; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
813; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
814; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
815; CGP-NEXT:    v_trunc_f32_e32 v3, v3
816; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
817; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
818; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
819; CGP-NEXT:    v_mul_lo_u32 v10, v4, v3
820; CGP-NEXT:    v_mul_lo_u32 v11, v4, v2
821; CGP-NEXT:    v_mul_lo_u32 v12, v5, v2
822; CGP-NEXT:    v_mul_hi_u32 v13, v4, v2
823; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
824; CGP-NEXT:    v_mul_lo_u32 v12, v3, v11
825; CGP-NEXT:    v_mul_hi_u32 v14, v2, v11
826; CGP-NEXT:    v_mul_hi_u32 v11, v3, v11
827; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
828; CGP-NEXT:    v_mul_lo_u32 v13, v2, v10
829; CGP-NEXT:    v_mul_lo_u32 v15, v3, v10
830; CGP-NEXT:    v_mul_hi_u32 v16, v2, v10
831; CGP-NEXT:    v_mul_hi_u32 v10, v3, v10
832; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
833; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
834; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
835; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
836; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
837; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
838; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
839; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
840; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
841; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
842; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
843; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
844; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
845; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
846; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
847; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v3, v10, vcc
848; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v10
849; CGP-NEXT:    v_mul_lo_u32 v10, v4, v2
850; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
851; CGP-NEXT:    v_mul_hi_u32 v12, v4, v2
852; CGP-NEXT:    v_mul_lo_u32 v4, v4, v11
853; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
854; CGP-NEXT:    v_mul_hi_u32 v14, v2, v10
855; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
856; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v5, v4
857; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
858; CGP-NEXT:    v_mul_lo_u32 v5, v2, v4
859; CGP-NEXT:    v_mul_lo_u32 v12, v11, v4
860; CGP-NEXT:    v_mul_hi_u32 v15, v2, v4
861; CGP-NEXT:    v_mul_hi_u32 v4, v11, v4
862; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v13, v5
863; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
864; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
865; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
866; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v14
867; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
868; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
869; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
870; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
871; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
872; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
873; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
874; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
875; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v10
876; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
877; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
878; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
879; CGP-NEXT:    v_mul_lo_u32 v4, v9, v2
880; CGP-NEXT:    v_mul_hi_u32 v5, v8, v2
881; CGP-NEXT:    v_mul_hi_u32 v2, v9, v2
882; CGP-NEXT:    v_mul_lo_u32 v10, v8, v3
883; CGP-NEXT:    v_mul_lo_u32 v11, v9, v3
884; CGP-NEXT:    v_mul_hi_u32 v12, v8, v3
885; CGP-NEXT:    v_mul_hi_u32 v3, v9, v3
886; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
887; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
888; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
889; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
890; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
891; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
892; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
893; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
894; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
895; CGP-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
896; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
897; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
898; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
899; CGP-NEXT:    v_mul_lo_u32 v5, v6, v2
900; CGP-NEXT:    v_mul_lo_u32 v10, v7, v2
901; CGP-NEXT:    v_mul_hi_u32 v11, v6, v2
902; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
903; CGP-NEXT:    v_mul_lo_u32 v4, v6, v3
904; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v2
905; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
906; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
907; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
908; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
909; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
910; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v8, v5
911; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v9, v4, vcc
912; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v9, v4
913; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
914; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
915; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
916; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
917; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v7, vcc
918; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
919; CGP-NEXT:    v_cndmask_b32_e32 v8, v11, v9, vcc
920; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
921; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
922; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v6
923; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
924; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v7
925; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
926; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
927; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
928; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
929; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v10, vcc
930; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v14, vcc
931; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
932; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
933; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
934; CGP-NEXT:    ; implicit-def: $vgpr6
935; CGP-NEXT:    ; implicit-def: $vgpr8
936; CGP-NEXT:  BB2_6: ; %Flow
937; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
938; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
939; CGP-NEXT:    s_cbranch_execz BB2_8
940; CGP-NEXT:  ; %bb.7:
941; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v6
942; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v6
943; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
944; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
945; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
946; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
947; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
948; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
949; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
950; CGP-NEXT:    v_mul_lo_u32 v3, v2, v6
951; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
952; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v3
953; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
954; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
955; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v6
956; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
957; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
958; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
959; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
960; CGP-NEXT:    v_mov_b32_e32 v3, 0
961; CGP-NEXT:  BB2_8:
962; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
963; CGP-NEXT:    s_setpc_b64 s[30:31]
964  %result = udiv <2 x i64> %num, %den
965  ret <2 x i64> %result
966}
967
968define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
969; CHECK-LABEL: v_udiv_i64_pow2k_denom:
970; CHECK:       ; %bb.0:
971; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 12
973; CHECK-NEXT:    s_setpc_b64 s[30:31]
974  %result = udiv i64 %num, 4096
975  ret i64 %result
976}
977
978define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
979; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
980; GISEL:       ; %bb.0:
981; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; GISEL-NEXT:    s_sub_u32 s4, 63, 11
983; GISEL-NEXT:    s_sub_u32 s4, 64, s4
984; GISEL-NEXT:    v_lshr_b64 v[0:1], v[0:1], s4
985; GISEL-NEXT:    v_lshr_b64 v[2:3], v[2:3], s4
986; GISEL-NEXT:    s_setpc_b64 s[30:31]
987;
988; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
989; CGP:       ; %bb.0:
990; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991; CGP-NEXT:    v_lshr_b64 v[0:1], v[0:1], 12
992; CGP-NEXT:    v_lshr_b64 v[2:3], v[2:3], 12
993; CGP-NEXT:    s_setpc_b64 s[30:31]
994  %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
995  ret <2 x i64> %result
996}
997
998define i64 @v_udiv_i64_oddk_denom(i64 %num) {
999; CHECK-LABEL: v_udiv_i64_oddk_denom:
1000; CHECK:       ; %bb.0:
1001; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002; CHECK-NEXT:    s_mov_b32 s4, 0x1fb03c31
1003; CHECK-NEXT:    s_mov_b32 s5, 0xd9528440
1004; CHECK-NEXT:    v_mul_lo_u32 v2, v1, s4
1005; CHECK-NEXT:    v_mul_lo_u32 v3, v0, s5
1006; CHECK-NEXT:    v_mul_hi_u32 v4, v0, s4
1007; CHECK-NEXT:    v_mul_lo_u32 v5, v1, s5
1008; CHECK-NEXT:    v_mul_hi_u32 v6, v1, s4
1009; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s5
1010; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1011; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1012; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1013; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1014; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1015; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1016; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
1017; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1018; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1019; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v4
1020; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1021; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1022; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1023; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s5
1024; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1025; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
1026; CHECK-NEXT:    s_setpc_b64 s[30:31]
1027  %result = udiv i64 %num, 1235195
1028  ret i64 %result
1029}
1030
1031define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
1032; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
1033; CHECK:       ; %bb.0:
1034; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035; CHECK-NEXT:    s_mov_b32 s4, 0x1fb03c31
1036; CHECK-NEXT:    s_mov_b32 s5, 0xd9528440
1037; CHECK-NEXT:    v_mul_lo_u32 v4, v1, s4
1038; CHECK-NEXT:    v_mul_lo_u32 v5, v0, s5
1039; CHECK-NEXT:    v_mul_hi_u32 v6, v0, s4
1040; CHECK-NEXT:    v_mul_lo_u32 v7, v1, s5
1041; CHECK-NEXT:    v_mul_hi_u32 v8, v1, s4
1042; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s5
1043; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s5
1044; CHECK-NEXT:    v_mul_lo_u32 v9, v3, s4
1045; CHECK-NEXT:    v_mul_lo_u32 v10, v2, s5
1046; CHECK-NEXT:    v_mul_hi_u32 v11, v2, s4
1047; CHECK-NEXT:    v_mul_lo_u32 v12, v3, s5
1048; CHECK-NEXT:    v_mul_hi_u32 v13, v3, s4
1049; CHECK-NEXT:    v_mul_hi_u32 v2, v2, s5
1050; CHECK-NEXT:    v_mul_hi_u32 v3, v3, s5
1051; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1052; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1053; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1054; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1055; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1056; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1057; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
1058; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1059; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1060; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1061; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
1062; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1063; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v11
1064; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1065; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
1066; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1067; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1068; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v6
1069; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v7
1070; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v13, v9
1071; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
1072; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1073; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1074; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1075; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1076; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v6
1077; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
1078; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1079; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
1080; CHECK-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
1081; CHECK-NEXT:    s_setpc_b64 s[30:31]
1082  %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
1083  ret <2 x i64> %result
1084}
1085
1086define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
1087; CHECK-LABEL: v_udiv_i64_pow2_shl_denom:
1088; CHECK:       ; %bb.0:
1089; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090; CHECK-NEXT:    v_mov_b32_e32 v3, v0
1091; CHECK-NEXT:    v_mov_b32_e32 v4, v1
1092; CHECK-NEXT:    s_mov_b64 s[4:5], 0x1000
1093; CHECK-NEXT:    v_lshl_b64 v[5:6], s[4:5], v2
1094; CHECK-NEXT:    v_or_b32_e32 v1, v4, v6
1095; CHECK-NEXT:    v_mov_b32_e32 v0, 0
1096; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1097; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
1098; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1099; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1100; CHECK-NEXT:    s_cbranch_execz BB7_2
1101; CHECK-NEXT:  ; %bb.1:
1102; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v5
1103; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v6
1104; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, 0, v5
1105; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v6, vcc
1106; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
1107; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1108; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1109; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
1110; CHECK-NEXT:    v_trunc_f32_e32 v1, v1
1111; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
1112; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
1113; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
1114; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v1
1115; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v0
1116; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v0
1117; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v0
1118; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1119; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v9
1120; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v9
1121; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v9
1122; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1123; CHECK-NEXT:    v_mul_lo_u32 v11, v0, v8
1124; CHECK-NEXT:    v_mul_lo_u32 v13, v1, v8
1125; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
1126; CHECK-NEXT:    v_mul_hi_u32 v8, v1, v8
1127; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1128; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1129; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1130; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1131; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1132; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1133; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
1134; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1135; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1136; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
1137; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1138; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1139; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1140; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1141; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1142; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v1, v8, vcc
1143; CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v8
1144; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v0
1145; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v0
1146; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v0
1147; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v9
1148; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
1149; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v8
1150; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
1151; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v7, v2
1152; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v10
1153; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v2
1154; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v2
1155; CHECK-NEXT:    v_mul_hi_u32 v13, v0, v2
1156; CHECK-NEXT:    v_mul_hi_u32 v2, v9, v2
1157; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
1158; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1159; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
1160; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1161; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
1162; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1163; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
1164; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1165; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
1166; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
1167; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
1168; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1169; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1170; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v8
1171; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
1172; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
1173; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1174; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v0
1175; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v0
1176; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
1177; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v1
1178; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v1
1179; CHECK-NEXT:    v_mul_hi_u32 v10, v3, v1
1180; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
1181; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1182; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1183; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
1184; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1185; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1186; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1187; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
1188; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1189; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
1190; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
1191; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1192; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1193; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1194; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v0
1195; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v0
1196; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v0
1197; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1198; CHECK-NEXT:    v_mul_lo_u32 v2, v5, v1
1199; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v0
1200; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
1201; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
1202; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
1203; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
1204; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
1205; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v7
1206; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v4, v2, vcc
1207; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v4, v2
1208; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v5
1209; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1210; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v6
1211; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1212; CHECK-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
1213; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v6
1214; CHECK-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
1215; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
1216; CHECK-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
1217; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v5
1218; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
1219; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
1220; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1221; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v6
1222; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v3, vcc
1223; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1224; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v8, vcc
1225; CHECK-NEXT:    v_cndmask_b32_e32 v3, v11, v12, vcc
1226; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1227; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1228; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1229; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
1230; CHECK-NEXT:    ; implicit-def: $vgpr3
1231; CHECK-NEXT:  BB7_2: ; %Flow
1232; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
1233; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
1234; CHECK-NEXT:    s_cbranch_execz BB7_4
1235; CHECK-NEXT:  ; %bb.3:
1236; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v5
1237; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v5
1238; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1239; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1240; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
1241; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
1242; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
1243; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1244; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
1245; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v5
1246; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
1247; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
1248; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
1249; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1250; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v5
1251; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1252; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
1253; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
1254; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1255; CHECK-NEXT:    v_mov_b32_e32 v1, 0
1256; CHECK-NEXT:  BB7_4:
1257; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
1258; CHECK-NEXT:    s_setpc_b64 s[30:31]
1259  %shl.y = shl i64 4096, %y
1260  %r = udiv i64 %x, %shl.y
1261  ret i64 %r
1262}
1263
1264define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1265; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
1266; GISEL:       ; %bb.0:
1267; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1268; GISEL-NEXT:    s_mov_b64 s[4:5], 0x1000
1269; GISEL-NEXT:    v_lshl_b64 v[7:8], s[4:5], v4
1270; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v6
1271; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v7
1272; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v8
1273; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v9
1274; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1275; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1276; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v6
1277; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
1278; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v9
1279; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1280; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
1281; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v7
1282; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v8, vcc
1283; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v6
1284; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v6
1285; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
1286; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v6
1287; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1288; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1289; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
1290; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v13
1291; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v12
1292; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
1293; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1294; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
1295; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1296; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1297; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
1298; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
1299; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v13
1300; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
1301; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1302; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
1303; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1304; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
1305; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1306; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1307; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1308; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
1309; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1310; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
1311; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
1312; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
1313; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v6
1314; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v6
1315; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
1316; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v6
1317; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
1318; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
1319; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
1320; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v10
1321; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v13
1322; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
1323; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1324; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
1325; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1326; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
1327; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
1328; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
1329; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v10
1330; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1331; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1332; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
1333; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1334; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
1335; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
1336; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1337; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1338; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
1339; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
1340; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v11
1341; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
1342; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
1343; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v6
1344; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
1345; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v6
1346; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1347; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1348; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1349; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1350; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1351; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
1352; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
1353; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
1354; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
1355; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1356; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
1357; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1358; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1359; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1360; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1361; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1362; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
1363; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1364; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v6
1365; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v6
1366; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v9
1367; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v6
1368; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1369; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1370; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
1371; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
1372; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
1373; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v8
1374; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
1375; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v7
1376; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
1377; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v8
1378; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
1379; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v7
1380; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
1381; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1382; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
1383; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
1384; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v8
1385; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
1386; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v7
1387; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1388; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v8
1389; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
1390; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
1391; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v12, vcc
1392; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1393; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
1394; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
1395; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1396; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
1397; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
1398; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v4
1399; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v5
1400; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
1401; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1402; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1403; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v6
1404; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1405; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v7
1406; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1407; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1408; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v4
1409; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v5, vcc
1410; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v6
1411; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v6
1412; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v7
1413; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v6
1414; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1415; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1416; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v10
1417; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v11
1418; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v10
1419; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
1420; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1421; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1422; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1423; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1424; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v11
1425; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
1426; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v11
1427; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1428; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1429; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
1430; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1431; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1432; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1433; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1434; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1435; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
1436; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1437; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1438; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v7, v11, vcc
1439; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v11
1440; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v6
1441; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v6
1442; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
1443; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
1444; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
1445; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1446; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
1447; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
1448; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v11
1449; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
1450; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1451; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
1452; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1453; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
1454; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
1455; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
1456; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
1457; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
1458; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1459; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
1460; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1461; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
1462; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
1463; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1464; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
1465; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
1466; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
1467; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v9
1468; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v8, vcc
1469; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, 0, v7, s[4:5]
1470; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v6
1471; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v7
1472; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v6
1473; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1474; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1475; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1476; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1477; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1478; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v7
1479; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1480; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v7
1481; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
1482; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1483; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1484; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1485; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1486; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1487; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1488; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1489; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v7
1490; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1491; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v6
1492; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v6
1493; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v7
1494; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v6
1495; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1496; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1497; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
1498; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
1499; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
1500; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v5
1501; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1502; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
1503; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1504; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v5
1505; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
1506; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v4
1507; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
1508; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
1509; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
1510; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v7, vcc
1511; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v5
1512; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1513; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
1514; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
1515; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
1516; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
1517; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
1518; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v10, vcc
1519; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1520; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
1521; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
1522; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1523; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
1524; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
1525; GISEL-NEXT:    s_setpc_b64 s[30:31]
1526;
1527; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
1528; CGP:       ; %bb.0:
1529; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530; CGP-NEXT:    v_mov_b32_e32 v8, v0
1531; CGP-NEXT:    v_mov_b32_e32 v9, v1
1532; CGP-NEXT:    v_mov_b32_e32 v5, v2
1533; CGP-NEXT:    v_mov_b32_e32 v7, v3
1534; CGP-NEXT:    s_mov_b64 s[4:5], 0x1000
1535; CGP-NEXT:    v_lshl_b64 v[2:3], s[4:5], v4
1536; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v6
1537; CGP-NEXT:    v_or_b32_e32 v1, v9, v3
1538; CGP-NEXT:    v_mov_b32_e32 v0, 0
1539; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1540; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
1541; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1542; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1543; CGP-NEXT:    s_cbranch_execz BB8_2
1544; CGP-NEXT:  ; %bb.1:
1545; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v2
1546; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v3
1547; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
1548; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v3, vcc
1549; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
1550; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1551; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1552; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
1553; CGP-NEXT:    v_trunc_f32_e32 v1, v1
1554; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
1555; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
1556; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
1557; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
1558; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
1559; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
1560; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
1561; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
1562; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
1563; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
1564; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
1565; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
1566; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
1567; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
1568; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
1569; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
1570; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
1571; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1572; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
1573; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1574; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
1575; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1576; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
1577; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1578; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1579; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
1580; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1581; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1582; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1583; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1584; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
1585; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
1586; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
1587; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
1588; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
1589; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
1590; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
1591; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
1592; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
1593; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
1594; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
1595; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
1596; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
1597; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
1598; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
1599; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
1600; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
1601; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1602; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
1603; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1604; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
1605; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1606; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
1607; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1608; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
1609; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
1610; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
1611; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1612; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
1613; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
1614; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
1615; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1616; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1617; CGP-NEXT:    v_mul_lo_u32 v4, v9, v0
1618; CGP-NEXT:    v_mul_hi_u32 v6, v8, v0
1619; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
1620; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
1621; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
1622; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
1623; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
1624; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
1625; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1626; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
1627; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1628; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1629; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1630; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
1631; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1632; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
1633; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
1634; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
1635; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1636; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1637; CGP-NEXT:    v_mul_lo_u32 v6, v2, v0
1638; CGP-NEXT:    v_mul_lo_u32 v12, v3, v0
1639; CGP-NEXT:    v_mul_hi_u32 v13, v2, v0
1640; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
1641; CGP-NEXT:    v_mul_lo_u32 v4, v2, v1
1642; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
1643; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
1644; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
1645; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
1646; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
1647; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
1648; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
1649; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v9, v4, vcc
1650; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v9, v4
1651; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v2
1652; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1653; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
1654; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
1655; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v3, vcc
1656; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
1657; CGP-NEXT:    v_cndmask_b32_e32 v8, v13, v9, vcc
1658; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v6, v2
1659; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1660; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v2
1661; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
1662; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v3
1663; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1664; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
1665; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
1666; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1667; CGP-NEXT:    v_cndmask_b32_e32 v2, v14, v12, vcc
1668; CGP-NEXT:    v_cndmask_b32_e32 v3, v15, v16, vcc
1669; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1670; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1671; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1672; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
1673; CGP-NEXT:    ; implicit-def: $vgpr8
1674; CGP-NEXT:  BB8_2: ; %Flow2
1675; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
1676; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
1677; CGP-NEXT:    s_cbranch_execz BB8_4
1678; CGP-NEXT:  ; %bb.3:
1679; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v2
1680; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
1681; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1682; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1683; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
1684; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
1685; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
1686; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1687; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
1688; CGP-NEXT:    v_mul_lo_u32 v1, v0, v2
1689; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
1690; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v1
1691; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
1692; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1693; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v2
1694; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1695; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
1696; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
1697; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1698; CGP-NEXT:    v_mov_b32_e32 v1, 0
1699; CGP-NEXT:  BB8_4:
1700; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
1701; CGP-NEXT:    v_or_b32_e32 v3, v7, v11
1702; CGP-NEXT:    v_mov_b32_e32 v2, 0
1703; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1704; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
1705; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1706; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1707; CGP-NEXT:    s_cbranch_execz BB8_6
1708; CGP-NEXT:  ; %bb.5:
1709; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v10
1710; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v11
1711; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
1712; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
1713; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
1714; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1715; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
1716; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
1717; CGP-NEXT:    v_trunc_f32_e32 v3, v3
1718; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
1719; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
1720; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
1721; CGP-NEXT:    v_mul_lo_u32 v8, v4, v3
1722; CGP-NEXT:    v_mul_lo_u32 v9, v4, v2
1723; CGP-NEXT:    v_mul_lo_u32 v12, v6, v2
1724; CGP-NEXT:    v_mul_hi_u32 v13, v4, v2
1725; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1726; CGP-NEXT:    v_mul_lo_u32 v12, v3, v9
1727; CGP-NEXT:    v_mul_hi_u32 v14, v2, v9
1728; CGP-NEXT:    v_mul_hi_u32 v9, v3, v9
1729; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
1730; CGP-NEXT:    v_mul_lo_u32 v13, v2, v8
1731; CGP-NEXT:    v_mul_lo_u32 v15, v3, v8
1732; CGP-NEXT:    v_mul_hi_u32 v16, v2, v8
1733; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
1734; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
1735; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1736; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1737; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1738; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1739; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1740; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
1741; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1742; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1743; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
1744; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1745; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1746; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1747; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1748; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
1749; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
1750; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
1751; CGP-NEXT:    v_mul_lo_u32 v8, v4, v2
1752; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
1753; CGP-NEXT:    v_mul_hi_u32 v12, v4, v2
1754; CGP-NEXT:    v_mul_lo_u32 v4, v4, v9
1755; CGP-NEXT:    v_mul_lo_u32 v13, v9, v8
1756; CGP-NEXT:    v_mul_hi_u32 v14, v2, v8
1757; CGP-NEXT:    v_mul_hi_u32 v8, v9, v8
1758; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
1759; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
1760; CGP-NEXT:    v_mul_lo_u32 v6, v2, v4
1761; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
1762; CGP-NEXT:    v_mul_hi_u32 v15, v2, v4
1763; CGP-NEXT:    v_mul_hi_u32 v4, v9, v4
1764; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
1765; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1766; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v12, v8
1767; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1768; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v14
1769; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1770; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v15
1771; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1772; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v6
1773; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v13
1774; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v8, v6
1775; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1776; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
1777; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v8
1778; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
1779; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1780; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1781; CGP-NEXT:    v_mul_lo_u32 v4, v7, v2
1782; CGP-NEXT:    v_mul_hi_u32 v6, v5, v2
1783; CGP-NEXT:    v_mul_hi_u32 v2, v7, v2
1784; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
1785; CGP-NEXT:    v_mul_lo_u32 v9, v7, v3
1786; CGP-NEXT:    v_mul_hi_u32 v12, v5, v3
1787; CGP-NEXT:    v_mul_hi_u32 v3, v7, v3
1788; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1789; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1790; CGP-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
1791; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1792; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1793; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1794; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
1795; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1796; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
1797; CGP-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
1798; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1799; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1800; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1801; CGP-NEXT:    v_mul_lo_u32 v6, v10, v2
1802; CGP-NEXT:    v_mul_lo_u32 v8, v11, v2
1803; CGP-NEXT:    v_mul_hi_u32 v9, v10, v2
1804; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1805; CGP-NEXT:    v_mul_lo_u32 v4, v10, v3
1806; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v2
1807; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
1808; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
1809; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v12
1810; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
1811; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1812; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
1813; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v7, v4, vcc
1814; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v7, v4
1815; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v10
1816; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
1817; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v11
1818; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1819; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v11, vcc
1820; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v11
1821; CGP-NEXT:    v_cndmask_b32_e32 v6, v9, v7, vcc
1822; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v10
1823; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1824; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v10
1825; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1826; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v11
1827; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
1828; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
1829; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v5, vcc
1830; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1831; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v8, vcc
1832; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v14, vcc
1833; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1834; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1835; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1836; CGP-NEXT:    ; implicit-def: $vgpr10_vgpr11
1837; CGP-NEXT:    ; implicit-def: $vgpr5
1838; CGP-NEXT:  BB8_6: ; %Flow
1839; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
1840; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
1841; CGP-NEXT:    s_cbranch_execz BB8_8
1842; CGP-NEXT:  ; %bb.7:
1843; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v10
1844; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v10
1845; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1846; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1847; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
1848; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
1849; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
1850; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1851; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
1852; CGP-NEXT:    v_mul_lo_u32 v3, v2, v10
1853; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
1854; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v3
1855; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v10
1856; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1857; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v10
1858; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1859; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
1860; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v10
1861; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1862; CGP-NEXT:    v_mov_b32_e32 v3, 0
1863; CGP-NEXT:  BB8_8:
1864; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
1865; CGP-NEXT:    s_setpc_b64 s[30:31]
1866  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
1867  %r = udiv <2 x i64> %x, %shl.y
1868  ret <2 x i64> %r
1869}
1870
1871define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
1872; GISEL-LABEL: v_udiv_i64_24bit:
1873; GISEL:       ; %bb.0:
1874; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1875; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
1876; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
1877; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
1878; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
1879; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
1880; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1881; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1882; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
1883; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
1884; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
1885; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1886; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
1887; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
1888; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
1889; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
1890; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
1891; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1892; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
1893; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1894; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
1895; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
1896; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
1897; GISEL-NEXT:    v_mov_b32_e32 v1, 0
1898; GISEL-NEXT:    s_setpc_b64 s[30:31]
1899;
1900; CGP-LABEL: v_udiv_i64_24bit:
1901; CGP:       ; %bb.0:
1902; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903; CGP-NEXT:    s_mov_b32 s4, 0xffffff
1904; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
1905; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
1906; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
1907; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v1
1908; CGP-NEXT:    v_rcp_f32_e32 v2, v1
1909; CGP-NEXT:    v_mul_f32_e32 v2, v0, v2
1910; CGP-NEXT:    v_trunc_f32_e32 v2, v2
1911; CGP-NEXT:    v_mad_f32 v0, -v2, v1, v0
1912; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
1913; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v1
1914; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1915; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
1916; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1917; CGP-NEXT:    v_mov_b32_e32 v1, 0
1918; CGP-NEXT:    s_setpc_b64 s[30:31]
1919  %num.mask = and i64 %num, 16777215
1920  %den.mask = and i64 %den, 16777215
1921  %result = udiv i64 %num.mask, %den.mask
1922  ret i64 %result
1923}
1924
1925define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
1926; GISEL-LABEL: v_udiv_v2i64_24bit:
1927; GISEL:       ; %bb.0:
1928; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1929; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
1930; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v7, 0
1931; GISEL-NEXT:    v_and_b32_e32 v1, s6, v4
1932; GISEL-NEXT:    v_and_b32_e32 v3, s6, v6
1933; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v1
1934; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
1935; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
1936; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
1937; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
1938; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
1939; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
1940; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v7
1941; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1942; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v8
1943; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1944; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
1945; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
1946; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v7
1947; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
1948; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
1949; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
1950; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
1951; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v11
1952; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
1953; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1954; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1955; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1956; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
1957; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v6
1958; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v6
1959; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v6
1960; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v7
1961; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v7
1962; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v7
1963; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
1964; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
1965; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
1966; GISEL-NEXT:    v_mul_hi_u32 v18, v7, v17
1967; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
1968; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v13
1969; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
1970; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1971; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
1972; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
1973; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v14
1974; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
1975; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
1976; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
1977; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v12
1978; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
1979; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1980; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
1981; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
1982; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1983; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
1984; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v12
1985; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
1986; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1987; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
1988; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1989; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
1990; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1991; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
1992; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
1993; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
1994; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v13
1995; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
1996; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
1997; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1998; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
1999; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
2000; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
2001; GISEL-NEXT:    s_bfe_i32 s12, -1, 0x10000
2002; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
2003; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
2004; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
2005; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
2006; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
2007; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2008; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2009; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
2010; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2011; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
2012; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
2013; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
2014; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
2015; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
2016; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
2017; GISEL-NEXT:    v_mul_lo_u32 v15, v4, v6
2018; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v6
2019; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v17
2020; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
2021; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v7
2022; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
2023; GISEL-NEXT:    v_mul_hi_u32 v18, v9, v7
2024; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
2025; GISEL-NEXT:    v_mul_lo_u32 v19, v16, v17
2026; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
2027; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v17
2028; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v18
2029; GISEL-NEXT:    v_mul_lo_u32 v18, v7, v9
2030; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
2031; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2032; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v18, v10
2033; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v6
2034; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v14
2035; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v15
2036; GISEL-NEXT:    v_add_i32_e64 v4, s[8:9], v5, v4
2037; GISEL-NEXT:    v_mul_hi_u32 v5, v6, v15
2038; GISEL-NEXT:    v_add_i32_e64 v4, s[8:9], v4, v10
2039; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v4
2040; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v18, v10
2041; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2042; GISEL-NEXT:    v_add_i32_e64 v5, s[8:9], v10, v5
2043; GISEL-NEXT:    v_mov_b32_e32 v5, s10
2044; GISEL-NEXT:    v_mov_b32_e32 v10, s11
2045; GISEL-NEXT:    v_add_i32_e64 v8, s[10:11], v8, v12
2046; GISEL-NEXT:    v_mov_b32_e32 v12, s12
2047; GISEL-NEXT:    v_add_i32_e64 v11, s[10:11], v11, v13
2048; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v15
2049; GISEL-NEXT:    v_mul_hi_u32 v15, v16, v17
2050; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
2051; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
2052; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v4
2053; GISEL-NEXT:    v_mul_hi_u32 v14, v14, v4
2054; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
2055; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v18, v13
2056; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2057; GISEL-NEXT:    v_add_i32_e64 v4, s[8:9], v13, v4
2058; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[8:9]
2059; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v18, v13
2060; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
2061; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
2062; GISEL-NEXT:    v_mul_lo_u32 v19, v16, v9
2063; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
2064; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
2065; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v19, v15
2066; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2067; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v15, v9
2068; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
2069; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v19, v15
2070; GISEL-NEXT:    v_mov_b32_e32 v19, s13
2071; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v4, v17
2072; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
2073; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v18
2074; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
2075; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v13, v17
2076; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
2077; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
2078; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v16, v15
2079; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v13, vcc
2080; GISEL-NEXT:    v_addc_u32_e64 v11, vcc, v11, v14, s[4:5]
2081; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2082; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v8, vcc
2083; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v4
2084; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v4
2085; GISEL-NEXT:    v_mul_hi_u32 v4, 0, v4
2086; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
2087; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
2088; GISEL-NEXT:    v_mul_lo_u32 v11, 0, v7
2089; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v7
2090; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
2091; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v6
2092; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v6
2093; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v6
2094; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
2095; GISEL-NEXT:    v_mul_lo_u32 v18, v2, v9
2096; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
2097; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2098; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
2099; GISEL-NEXT:    v_mul_lo_u32 v11, 0, v9
2100; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
2101; GISEL-NEXT:    v_mul_hi_u32 v9, 0, v9
2102; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v15
2103; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2104; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v16, v4
2105; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2106; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2107; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2108; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
2109; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2110; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v17
2111; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2112; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2113; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
2114; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2115; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
2116; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
2117; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v17
2118; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
2119; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2120; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2121; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
2122; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2123; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
2124; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v4
2125; GISEL-NEXT:    v_mul_lo_u32 v15, 0, v4
2126; GISEL-NEXT:    v_mul_hi_u32 v16, v1, v4
2127; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
2128; GISEL-NEXT:    v_mul_lo_u32 v14, v3, v7
2129; GISEL-NEXT:    v_mul_lo_u32 v17, 0, v7
2130; GISEL-NEXT:    v_mul_hi_u32 v18, v3, v7
2131; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2132; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v11
2133; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
2134; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v8
2135; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
2136; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
2137; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v4
2138; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v6, vcc
2139; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
2140; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
2141; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v13
2142; GISEL-NEXT:    v_subb_u32_e64 v13, s[4:5], 0, v9, vcc
2143; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
2144; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
2145; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
2146; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], 1, v7
2147; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, v8, s[6:7]
2148; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v14
2149; GISEL-NEXT:    v_subb_u32_e64 v14, s[8:9], 0, v11, s[6:7]
2150; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[4:5]
2151; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
2152; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
2153; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
2154; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s[4:5]
2155; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], 1, v15
2156; GISEL-NEXT:    v_addc_u32_e64 v16, s[4:5], 0, v17, s[4:5]
2157; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], 0, v11
2158; GISEL-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[6:7]
2159; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
2160; GISEL-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[4:5]
2161; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
2162; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 1, v13
2163; GISEL-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, v18, s[6:7]
2164; GISEL-NEXT:    v_sub_i32_e64 v9, s[6:7], 0, v9
2165; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
2166; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2167; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
2168; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
2169; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2170; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
2171; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
2172; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
2173; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
2174; GISEL-NEXT:    v_cndmask_b32_e32 v1, v19, v1, vcc
2175; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2176; GISEL-NEXT:    v_cndmask_b32_e32 v0, v15, v14, vcc
2177; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v1
2178; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v2, s[4:5]
2179; GISEL-NEXT:    v_cndmask_b32_e32 v9, v17, v16, vcc
2180; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2181; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
2182; GISEL-NEXT:    v_cndmask_b32_e64 v3, v18, v3, s[4:5]
2183; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v12
2184; GISEL-NEXT:    v_cndmask_b32_e64 v2, v7, v1, s[4:5]
2185; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v9, vcc
2186; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
2187; GISEL-NEXT:    s_setpc_b64 s[30:31]
2188;
2189; CGP-LABEL: v_udiv_v2i64_24bit:
2190; CGP:       ; %bb.0:
2191; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2192; CGP-NEXT:    s_mov_b32 s6, 0xffffff
2193; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
2194; CGP-NEXT:    v_and_b32_e32 v1, s6, v2
2195; CGP-NEXT:    v_and_b32_e32 v2, s6, v4
2196; CGP-NEXT:    v_and_b32_e32 v3, s6, v6
2197; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
2198; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v2
2199; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v1
2200; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v3
2201; CGP-NEXT:    v_rcp_f32_e32 v4, v2
2202; CGP-NEXT:    v_rcp_f32_e32 v5, v3
2203; CGP-NEXT:    v_mul_f32_e32 v4, v0, v4
2204; CGP-NEXT:    v_mul_f32_e32 v5, v1, v5
2205; CGP-NEXT:    v_trunc_f32_e32 v4, v4
2206; CGP-NEXT:    v_trunc_f32_e32 v5, v5
2207; CGP-NEXT:    v_mad_f32 v0, -v4, v2, v0
2208; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2209; CGP-NEXT:    v_mad_f32 v1, -v5, v3, v1
2210; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2211; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
2212; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2213; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v1|, v3
2214; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
2215; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
2216; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
2217; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
2218; CGP-NEXT:    v_and_b32_e32 v2, s6, v1
2219; CGP-NEXT:    v_mov_b32_e32 v1, 0
2220; CGP-NEXT:    v_mov_b32_e32 v3, 0
2221; CGP-NEXT:    s_setpc_b64 s[30:31]
2222  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2223  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2224  %result = udiv <2 x i64> %num.mask, %den.mask
2225  ret <2 x i64> %result
2226}
2227