1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i64 @v_udiv_i64(i64 %num, i64 %den) { 8; CHECK-LABEL: v_udiv_i64: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 12; CHECK-NEXT: v_mov_b32_e32 v4, 0 13; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 14; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 15; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 16; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 17; CHECK-NEXT: s_cbranch_execz BB0_2 18; CHECK-NEXT: ; %bb.1: 19; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 20; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 21; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 22; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc 23; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 24; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 25; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 26; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 27; CHECK-NEXT: v_trunc_f32_e32 v5, v5 28; CHECK-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 29; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 30; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 31; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 32; CHECK-NEXT: v_mul_lo_u32 v9, v6, v4 33; CHECK-NEXT: v_mul_lo_u32 v10, v7, v4 34; CHECK-NEXT: v_mul_hi_u32 v11, v6, v4 35; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 36; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 37; CHECK-NEXT: v_mul_hi_u32 v12, v4, v9 38; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 39; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 40; CHECK-NEXT: v_mul_lo_u32 v11, v4, v8 41; CHECK-NEXT: v_mul_lo_u32 v13, v5, v8 42; CHECK-NEXT: v_mul_hi_u32 v14, v4, v8 43; CHECK-NEXT: v_mul_hi_u32 v8, v5, v8 44; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 45; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 46; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 47; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 48; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 49; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 50; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 51; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 52; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 53; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 54; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 55; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 56; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 57; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 58; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 59; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v5, v8, vcc 60; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v8 61; CHECK-NEXT: v_mul_lo_u32 v8, v6, v4 62; CHECK-NEXT: v_mul_lo_u32 v7, v7, v4 63; CHECK-NEXT: v_mul_hi_u32 v10, v6, v4 64; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 65; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 66; CHECK-NEXT: v_mul_hi_u32 v12, v4, v8 67; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 68; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 69; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 70; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 71; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 72; CHECK-NEXT: v_mul_hi_u32 v13, v4, v6 73; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 74; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 75; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 76; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 77; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 78; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 79; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 80; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 81; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 82; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 83; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 84; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 85; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 86; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 87; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 88; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc 89; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 90; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 91; CHECK-NEXT: v_mul_lo_u32 v6, v1, v4 92; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 93; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 94; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 95; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 96; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 97; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 98; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 99; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 100; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 101; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 102; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 103; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 104; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 105; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 106; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 107; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 108; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 109; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 110; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 111; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 112; CHECK-NEXT: v_mul_lo_u32 v8, v3, v4 113; CHECK-NEXT: v_mul_hi_u32 v9, v2, v4 114; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 115; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 116; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v4 117; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc 118; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 119; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 120; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 121; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 122; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 123; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc 124; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 125; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 126; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 127; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 128; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 129; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 130; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 131; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc 132; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 133; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 134; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 135; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 136; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 137; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 138; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 139; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 140; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 141; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc 142; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc 143; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 144; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 145; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc 146; CHECK-NEXT: ; implicit-def: $vgpr2 147; CHECK-NEXT: ; implicit-def: $vgpr0 148; CHECK-NEXT: BB0_2: ; %Flow 149; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 150; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] 151; CHECK-NEXT: s_cbranch_execz BB0_4 152; CHECK-NEXT: ; %bb.3: 153; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 154; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 155; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 156; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 157; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 158; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 159; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 160; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 161; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 162; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 163; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1 164; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 165; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 166; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 167; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v2 168; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 169; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 170; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 171; CHECK-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 172; CHECK-NEXT: v_mov_b32_e32 v5, 0 173; CHECK-NEXT: BB0_4: 174; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 175; CHECK-NEXT: v_mov_b32_e32 v0, v4 176; CHECK-NEXT: v_mov_b32_e32 v1, v5 177; CHECK-NEXT: s_setpc_b64 s[30:31] 178 %result = udiv i64 %num, %den 179 ret i64 %result 180} 181 182; FIXME: This is a workaround for not handling uniform VGPR case. 183declare i32 @llvm.amdgcn.readfirstlane(i32) 184 185define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { 186; CHECK-LABEL: s_udiv_i64: 187; CHECK: ; %bb.0: 188; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3] 189; CHECK-NEXT: s_mov_b32 s4, 0 190; CHECK-NEXT: s_mov_b32 s5, -1 191; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] 192; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 193; CHECK-NEXT: s_cbranch_vccz BB1_2 194; CHECK-NEXT: ; %bb.1: 195; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 196; CHECK-NEXT: v_mov_b32_e32 v1, s3 197; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 198; CHECK-NEXT: s_sub_u32 s6, 0, s2 199; CHECK-NEXT: s_cselect_b32 s4, 1, 0 200; CHECK-NEXT: v_mov_b32_e32 v3, s1 201; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 202; CHECK-NEXT: s_and_b32 s4, s4, 1 203; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 204; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 205; CHECK-NEXT: s_cmp_lg_u32 s4, 0 206; CHECK-NEXT: s_subb_u32 s7, 0, s3 207; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 208; CHECK-NEXT: v_trunc_f32_e32 v2, v2 209; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 210; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 211; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 212; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 213; CHECK-NEXT: v_mul_lo_u32 v5, s6, v0 214; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 215; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 216; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 217; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 218; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 219; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 220; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 221; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 222; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 223; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 224; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 225; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 226; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 227; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 228; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 229; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 230; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 231; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 232; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 233; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 234; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 235; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 236; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 237; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 238; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 239; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 240; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc 241; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 242; CHECK-NEXT: v_mul_lo_u32 v4, s6, v0 243; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 244; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 245; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 246; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 247; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 248; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 249; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 250; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 251; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 252; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 253; CHECK-NEXT: v_mul_hi_u32 v11, v0, v6 254; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 255; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 256; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 257; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 258; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 259; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 260; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 261; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 262; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 263; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 264; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 265; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 266; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 267; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 268; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 269; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc 270; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 271; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 272; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 273; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 274; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 275; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 276; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 277; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 278; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 279; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 280; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 281; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 282; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 283; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 284; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 285; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 286; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 287; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 288; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 289; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 290; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 291; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 292; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 293; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 294; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 295; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 296; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 297; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 298; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 299; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 300; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 301; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 302; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc 303; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2 304; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 305; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 306; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 307; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 308; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc 309; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 310; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc 311; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5 312; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 313; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 314; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 315; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 316; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 317; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 318; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 319; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 320; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc 321; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 322; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 323; CHECK-NEXT: s_mov_b32 s5, 0 324; CHECK-NEXT: s_branch BB1_3 325; CHECK-NEXT: BB1_2: 326; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 327; CHECK-NEXT: BB1_3: ; %Flow 328; CHECK-NEXT: s_xor_b32 s1, s5, -1 329; CHECK-NEXT: s_and_b32 s1, s1, 1 330; CHECK-NEXT: s_cmp_lg_u32 s1, 0 331; CHECK-NEXT: s_cbranch_scc1 BB1_5 332; CHECK-NEXT: ; %bb.4: 333; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 334; CHECK-NEXT: s_sub_i32 s1, 0, s2 335; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 336; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 337; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 338; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 339; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 340; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 341; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 342; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 343; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 344; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 345; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 346; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 347; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1 348; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 349; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 350; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 351; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 352; CHECK-NEXT: BB1_5: 353; CHECK-NEXT: v_readfirstlane_b32 s0, v0 354; CHECK-NEXT: s_mov_b32 s1, s0 355; CHECK-NEXT: ; return to shader part epilog 356 %result = udiv i64 %num, %den 357 %cast = bitcast i64 %result to <2 x i32> 358 %elt.0 = extractelement <2 x i32> %cast, i32 0 359 %elt.1 = extractelement <2 x i32> %cast, i32 1 360 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) 361 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) 362 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 363 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 364 %cast.back = bitcast <2 x i32> %ins.1 to i64 365 ret i64 %cast.back 366} 367 368define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { 369; GISEL-LABEL: v_udiv_v2i64: 370; GISEL: ; %bb.0: 371; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 373; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 374; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 375; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 376; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 377; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 378; GISEL-NEXT: v_trunc_f32_e32 v9, v9 379; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 380; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 381; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 382; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 383; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 384; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 385; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 386; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 387; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 388; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 389; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 390; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 391; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 392; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 393; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 394; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 395; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 396; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 397; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 398; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 399; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 400; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 401; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 402; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 403; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 404; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 405; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 406; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 407; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 408; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 409; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 410; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 411; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 412; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 413; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 414; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 415; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 416; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 417; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 418; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 419; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 420; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 421; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 422; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 423; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 424; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 425; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 426; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 427; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 428; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 429; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 430; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 431; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 432; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 433; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 434; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 435; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 436; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 437; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 438; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 439; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 440; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 441; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 442; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 443; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 444; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 445; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 446; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 447; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 448; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 449; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 450; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 451; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 452; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 453; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 454; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 455; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 456; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 457; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 458; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 459; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 460; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 461; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 462; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 463; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 464; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 465; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 466; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 467; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9 468; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 469; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 470; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 471; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 472; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc 473; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 474; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 475; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 476; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 477; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 478; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 479; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] 480; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 481; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 482; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 483; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 484; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc 485; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 486; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 487; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 488; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 489; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 490; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc 491; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 492; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc 493; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 494; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc 495; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc 496; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 497; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 498; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 499; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 500; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 501; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 502; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 503; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 504; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 505; GISEL-NEXT: v_trunc_f32_e32 v5, v5 506; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 507; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 508; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 509; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 510; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 511; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 512; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 513; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 514; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 515; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 516; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 517; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 518; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 519; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 520; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 521; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 522; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 523; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 524; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 525; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 526; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 527; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 528; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 529; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 530; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 531; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 532; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 533; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 534; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 535; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 536; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 537; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 538; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 539; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc 540; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 541; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 542; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 543; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 544; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 545; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 546; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 547; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 548; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 549; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 550; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 551; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 552; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 553; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 554; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 555; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 556; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 557; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 558; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 559; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 560; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 561; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 562; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 563; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 564; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 565; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 566; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 567; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 568; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 569; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 570; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] 571; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 572; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 573; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 574; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 575; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 576; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 577; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 578; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 579; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 580; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 581; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 582; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 583; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 584; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 585; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 586; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 587; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 588; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 589; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 590; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 591; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 592; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 593; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 594; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5 595; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4 596; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 597; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 598; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 599; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc 600; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 601; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 602; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 603; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 604; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 605; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7 606; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] 607; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 608; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 609; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] 610; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 611; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc 612; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 613; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 614; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 615; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 616; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 617; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc 618; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 619; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc 620; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 621; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc 622; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc 623; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 624; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 625; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 626; GISEL-NEXT: s_setpc_b64 s[30:31] 627; 628; CGP-LABEL: v_udiv_v2i64: 629; CGP: ; %bb.0: 630; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; CGP-NEXT: v_mov_b32_e32 v8, v0 632; CGP-NEXT: v_mov_b32_e32 v9, v1 633; CGP-NEXT: v_or_b32_e32 v1, v9, v5 634; CGP-NEXT: v_mov_b32_e32 v0, 0 635; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 636; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 637; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 638; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 639; CGP-NEXT: s_cbranch_execz BB2_2 640; CGP-NEXT: ; %bb.1: 641; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 642; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 643; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 644; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 645; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 646; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 647; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 648; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 649; CGP-NEXT: v_trunc_f32_e32 v1, v1 650; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 651; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 652; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 653; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 654; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 655; CGP-NEXT: v_mul_lo_u32 v14, v11, v0 656; CGP-NEXT: v_mul_hi_u32 v15, v10, v0 657; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 658; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 659; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 660; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 661; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 662; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 663; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 664; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 665; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 666; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 667; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 668; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 669; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 670; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 671; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 672; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 673; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 674; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 675; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 676; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 677; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 678; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 679; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 680; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 681; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 682; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 683; CGP-NEXT: v_mul_lo_u32 v12, v10, v0 684; CGP-NEXT: v_mul_lo_u32 v11, v11, v0 685; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 686; CGP-NEXT: v_mul_lo_u32 v10, v10, v13 687; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 688; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 689; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 690; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 691; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 692; CGP-NEXT: v_mul_lo_u32 v11, v0, v10 693; CGP-NEXT: v_mul_lo_u32 v14, v13, v10 694; CGP-NEXT: v_mul_hi_u32 v17, v0, v10 695; CGP-NEXT: v_mul_hi_u32 v10, v13, v10 696; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 697; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 698; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 699; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 700; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 701; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 702; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 703; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 704; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 705; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 706; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 707; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 708; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 709; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12 710; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc 711; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 712; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 713; CGP-NEXT: v_mul_lo_u32 v10, v9, v0 714; CGP-NEXT: v_mul_hi_u32 v11, v8, v0 715; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 716; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 717; CGP-NEXT: v_mul_lo_u32 v13, v9, v1 718; CGP-NEXT: v_mul_hi_u32 v14, v8, v1 719; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 720; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 721; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 722; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 723; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 724; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 725; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 726; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 727; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 728; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 729; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 730; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 731; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 732; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 733; CGP-NEXT: v_mul_lo_u32 v11, v4, v0 734; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 735; CGP-NEXT: v_mul_hi_u32 v13, v4, v0 736; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v10 737; CGP-NEXT: v_mul_lo_u32 v10, v4, v1 738; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 739; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc 740; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 741; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 742; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 743; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 744; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v11 745; CGP-NEXT: v_subb_u32_e64 v11, s[4:5], v9, v10, vcc 746; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v9, v10 747; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 748; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 749; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 750; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 751; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v5, vcc 752; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 753; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v10, vcc 754; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4 755; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 756; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 757; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 758; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v5 759; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc 760; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v5 761; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 762; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 763; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v12, vcc 764; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v16, vcc 765; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 766; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 767; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 768; CGP-NEXT: ; implicit-def: $vgpr4 769; CGP-NEXT: ; implicit-def: $vgpr8 770; CGP-NEXT: BB2_2: ; %Flow2 771; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 772; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 773; CGP-NEXT: s_cbranch_execz BB2_4 774; CGP-NEXT: ; %bb.3: 775; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 776; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 777; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 778; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 779; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 780; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 781; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 782; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 783; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 784; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 785; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 786; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 787; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 788; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 789; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v1, v4 790; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 791; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 792; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 793; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 794; CGP-NEXT: v_mov_b32_e32 v1, 0 795; CGP-NEXT: BB2_4: 796; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 797; CGP-NEXT: v_or_b32_e32 v5, v3, v7 798; CGP-NEXT: v_mov_b32_e32 v4, 0 799; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 800; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 801; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 802; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 803; CGP-NEXT: s_cbranch_execz BB2_6 804; CGP-NEXT: ; %bb.5: 805; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 806; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 807; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 808; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 809; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 810; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 811; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 812; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 813; CGP-NEXT: v_trunc_f32_e32 v5, v5 814; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 815; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 816; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 817; CGP-NEXT: v_mul_lo_u32 v10, v8, v5 818; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 819; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 820; CGP-NEXT: v_mul_hi_u32 v13, v8, v4 821; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 822; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 823; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 824; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 825; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 826; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 827; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 828; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 829; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 830; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 831; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 832; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 833; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 834; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 835; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 836; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 837; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 838; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 839; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 840; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 841; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 842; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 843; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 844; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 845; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc 846; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 847; CGP-NEXT: v_mul_lo_u32 v10, v8, v4 848; CGP-NEXT: v_mul_lo_u32 v9, v9, v4 849; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 850; CGP-NEXT: v_mul_lo_u32 v8, v8, v11 851; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 852; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 853; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 854; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 855; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 856; CGP-NEXT: v_mul_lo_u32 v9, v4, v8 857; CGP-NEXT: v_mul_lo_u32 v12, v11, v8 858; CGP-NEXT: v_mul_hi_u32 v15, v4, v8 859; CGP-NEXT: v_mul_hi_u32 v8, v11, v8 860; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 861; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 862; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 863; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 864; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 865; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 866; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 867; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 868; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 869; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 870; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 871; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 872; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 873; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 874; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 875; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 876; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 877; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 878; CGP-NEXT: v_mul_hi_u32 v9, v2, v4 879; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 880; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 881; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 882; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 883; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 884; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 885; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 886; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 887; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 888; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 889; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 890; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 891; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 892; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 893; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 894; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 895; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 896; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 897; CGP-NEXT: v_mul_lo_u32 v9, v6, v4 898; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 899; CGP-NEXT: v_mul_hi_u32 v11, v6, v4 900; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 901; CGP-NEXT: v_mul_lo_u32 v8, v6, v5 902; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v4 903; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v5, vcc 904; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 905; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 906; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc 907; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 908; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 909; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc 910; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 911; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 912; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 913; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7 914; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 915; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 916; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 917; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v8, vcc 918; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 919; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 920; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 921; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 922; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 923; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 924; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 925; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 926; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 927; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v10, vcc 928; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v14, vcc 929; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 930; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc 931; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc 932; CGP-NEXT: ; implicit-def: $vgpr6 933; CGP-NEXT: ; implicit-def: $vgpr2 934; CGP-NEXT: BB2_6: ; %Flow 935; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 936; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 937; CGP-NEXT: s_cbranch_execz BB2_8 938; CGP-NEXT: ; %bb.7: 939; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 940; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 941; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 942; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 943; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 944; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 945; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 946; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 947; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 948; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 949; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 950; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 951; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 952; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 953; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6 954; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 955; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 956; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 957; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 958; CGP-NEXT: v_mov_b32_e32 v5, 0 959; CGP-NEXT: BB2_8: 960; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 961; CGP-NEXT: v_mov_b32_e32 v2, v4 962; CGP-NEXT: v_mov_b32_e32 v3, v5 963; CGP-NEXT: s_setpc_b64 s[30:31] 964 %result = udiv <2 x i64> %num, %den 965 ret <2 x i64> %result 966} 967 968define i64 @v_udiv_i64_pow2k_denom(i64 %num) { 969; CHECK-LABEL: v_udiv_i64_pow2k_denom: 970; CHECK: ; %bb.0: 971; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 973; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 974; CHECK-NEXT: s_movk_i32 s6, 0xf000 975; CHECK-NEXT: s_movk_i32 s7, 0x1000 976; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 977; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000 978; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 979; CHECK-NEXT: v_mov_b32_e32 v3, s4 980; CHECK-NEXT: v_mov_b32_e32 v4, s5 981; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 982; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 983; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 984; CHECK-NEXT: v_trunc_f32_e32 v5, v5 985; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 986; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 987; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 988; CHECK-NEXT: v_mul_lo_u32 v6, s6, v5 989; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 990; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 991; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2 992; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 993; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 994; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7 995; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 996; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 997; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6 998; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 999; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 1000; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 1001; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1002; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1003; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 1004; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1005; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1006; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1007; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 1008; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1009; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1010; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 1011; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1012; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1013; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1014; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1015; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 1016; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], v5, v6, vcc 1017; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 1018; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 1019; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 1020; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2 1021; CHECK-NEXT: v_mul_lo_u32 v10, s6, v7 1022; CHECK-NEXT: v_mul_lo_u32 v11, v7, v6 1023; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 1024; CHECK-NEXT: v_mul_hi_u32 v6, v7, v6 1025; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 1026; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 1027; CHECK-NEXT: v_mul_lo_u32 v9, v2, v8 1028; CHECK-NEXT: v_mul_lo_u32 v10, v7, v8 1029; CHECK-NEXT: v_mul_hi_u32 v13, v2, v8 1030; CHECK-NEXT: v_mul_hi_u32 v7, v7, v8 1031; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v11, v9 1032; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1033; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 1034; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1035; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 1036; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1037; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v13 1038; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1039; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1040; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 1041; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1042; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1043; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1044; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 1045; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc 1046; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1047; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1048; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 1049; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 1050; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 1051; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 1052; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 1053; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 1054; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 1055; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1056; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1057; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 1058; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1059; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1060; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1061; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 1062; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1063; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1064; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 1065; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1066; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1067; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1068; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 1069; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2 1070; CHECK-NEXT: v_mul_hi_u32 v9, s7, v2 1071; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1072; CHECK-NEXT: v_mul_lo_u32 v6, s7, v5 1073; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2 1074; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc 1075; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1076; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 1077; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 1078; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1079; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 1080; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc 1081; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 1082; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 1083; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1084; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 1085; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 1086; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1087; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 1088; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1089; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 1090; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1091; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1092; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1093; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1094; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc 1095; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc 1096; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1097; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1098; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1099; CHECK-NEXT: s_setpc_b64 s[30:31] 1100 %result = udiv i64 %num, 4096 1101 ret i64 %result 1102} 1103 1104define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) { 1105; GISEL-LABEL: v_udiv_v2i64_pow2k_denom: 1106; GISEL: ; %bb.0: 1107; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1108; GISEL-NEXT: s_movk_i32 s12, 0x1000 1109; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 1110; GISEL-NEXT: s_sub_u32 s8, 0, s12 1111; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1112; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 1113; GISEL-NEXT: v_mov_b32_e32 v6, v4 1114; GISEL-NEXT: s_and_b32 s4, s4, 1 1115; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1116; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 1117; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 1118; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 1119; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1120; GISEL-NEXT: s_subb_u32 s9, 0, 0 1121; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000 1122; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000 1123; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1124; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1125; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1126; GISEL-NEXT: s_sub_u32 s13, 0, s12 1127; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1128; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1129; GISEL-NEXT: v_trunc_f32_e32 v6, v6 1130; GISEL-NEXT: s_and_b32 s4, s4, 1 1131; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1132; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1133; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1134; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1135; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1136; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 1137; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1138; GISEL-NEXT: s_subb_u32 s6, 0, 0 1139; GISEL-NEXT: v_mul_lo_u32 v8, s13, v6 1140; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 1141; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 1142; GISEL-NEXT: v_mul_lo_u32 v10, s13, v4 1143; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1144; GISEL-NEXT: v_mul_hi_u32 v12, s13, v4 1145; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 1146; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1147; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1148; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1149; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 1150; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 1151; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 1152; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1153; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 1154; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 1155; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 1156; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1157; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1158; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 1159; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 1160; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1161; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 1162; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 1163; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1164; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1165; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1166; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 1167; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 1168; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 1169; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1170; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1171; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1172; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1173; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1174; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1175; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1176; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1177; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1178; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1179; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1180; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1181; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1182; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1183; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1184; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1185; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1186; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1187; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1188; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1189; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1190; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1191; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1192; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1193; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1194; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1195; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1196; GISEL-NEXT: v_mul_lo_u32 v11, s13, v4 1197; GISEL-NEXT: v_mul_lo_u32 v12, s6, v4 1198; GISEL-NEXT: v_mul_hi_u32 v14, s13, v4 1199; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1200; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1201; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5 1202; GISEL-NEXT: v_mul_lo_u32 v16, s9, v5 1203; GISEL-NEXT: v_mul_hi_u32 v17, s8, v5 1204; GISEL-NEXT: v_mul_lo_u32 v18, s8, v13 1205; GISEL-NEXT: v_mul_lo_u32 v19, v13, v15 1206; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 1207; GISEL-NEXT: v_mul_hi_u32 v18, v5, v15 1208; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 1209; GISEL-NEXT: v_mul_lo_u32 v17, v5, v16 1210; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 1211; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1212; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 1213; GISEL-NEXT: v_mul_lo_u32 v17, s13, v10 1214; GISEL-NEXT: v_mul_lo_u32 v18, v10, v11 1215; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17 1216; GISEL-NEXT: v_mul_hi_u32 v17, v4, v11 1217; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14 1218; GISEL-NEXT: v_mul_lo_u32 v14, v4, v12 1219; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 1220; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1221; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 1222; GISEL-NEXT: v_mov_b32_e32 v14, s10 1223; GISEL-NEXT: v_mov_b32_e32 v17, s11 1224; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000 1225; GISEL-NEXT: s_bfe_i32 s14, -1, 0x10000 1226; GISEL-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8 1227; GISEL-NEXT: v_mov_b32_e32 v8, s13 1228; GISEL-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9 1229; GISEL-NEXT: v_mul_hi_u32 v9, v10, v11 1230; GISEL-NEXT: v_mul_hi_u32 v11, v13, v15 1231; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1232; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 1233; GISEL-NEXT: v_mul_lo_u32 v18, v10, v12 1234; GISEL-NEXT: v_mul_hi_u32 v10, v10, v12 1235; GISEL-NEXT: v_mul_hi_u32 v12, v4, v12 1236; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9 1237; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1238; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12 1239; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] 1240; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12 1241; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1242; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 1243; GISEL-NEXT: v_mul_lo_u32 v19, v13, v16 1244; GISEL-NEXT: v_mul_hi_u32 v13, v13, v16 1245; GISEL-NEXT: v_mul_hi_u32 v16, v5, v16 1246; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11 1247; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1248; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1249; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1250; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16 1251; GISEL-NEXT: v_mov_b32_e32 v19, s14 1252; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 1253; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 1254; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 1255; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1256; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15 1257; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18 1258; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1259; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15 1260; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1261; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5] 1262; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1263; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1264; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4 1265; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 1266; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 1267; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 1268; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1269; GISEL-NEXT: v_mul_lo_u32 v11, v1, v5 1270; GISEL-NEXT: v_mul_hi_u32 v12, v0, v5 1271; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 1272; GISEL-NEXT: v_mul_lo_u32 v13, v2, v6 1273; GISEL-NEXT: v_mul_lo_u32 v15, v3, v6 1274; GISEL-NEXT: v_mul_hi_u32 v16, v2, v6 1275; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1276; GISEL-NEXT: v_mul_lo_u32 v18, v0, v7 1277; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1278; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1279; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1280; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7 1281; GISEL-NEXT: v_mul_hi_u32 v12, v0, v7 1282; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 1283; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 1284; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1285; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4 1286; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1287; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 1288; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1289; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10 1290; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1291; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16 1292; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1293; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1294; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 1295; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1296; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1297; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 1298; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 1299; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1300; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1301; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1302; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 1303; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1304; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1305; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 1306; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4 1307; GISEL-NEXT: v_mul_hi_u32 v15, s12, v4 1308; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1309; GISEL-NEXT: v_mul_lo_u32 v12, s12, v5 1310; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5 1311; GISEL-NEXT: v_mul_hi_u32 v18, s12, v5 1312; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1313; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 1314; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 1315; GISEL-NEXT: v_mul_lo_u32 v11, s12, v7 1316; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1317; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1318; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v4 1319; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc 1320; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1321; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1322; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v5 1323; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc 1324; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1325; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc 1326; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 1327; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 1328; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1329; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 1330; GISEL-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5] 1331; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11 1332; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 1333; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13 1334; GISEL-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9] 1335; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 1336; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0 1337; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 1338; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 1339; GISEL-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7] 1340; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15 1341; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7] 1342; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1343; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1344; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2 1345; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1346; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 1347; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1348; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 1349; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1350; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 1351; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1352; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1353; GISEL-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc 1354; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1355; GISEL-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc 1356; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1357; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc 1358; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 1359; GISEL-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5] 1360; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc 1361; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 1362; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc 1363; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5] 1364; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 1365; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] 1366; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 1367; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1368; GISEL-NEXT: s_setpc_b64 s[30:31] 1369; 1370; CGP-LABEL: v_udiv_v2i64_pow2k_denom: 1371; CGP: ; %bb.0: 1372; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1373; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 1374; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 1375; CGP-NEXT: s_movk_i32 s8, 0xf000 1376; CGP-NEXT: s_movk_i32 s12, 0x1000 1377; CGP-NEXT: s_bfe_i32 s10, -1, 0x10000 1378; CGP-NEXT: s_bfe_i32 s11, -1, 0x10000 1379; CGP-NEXT: s_bfe_i32 s13, -1, 0x10000 1380; CGP-NEXT: s_bfe_i32 s14, -1, 0x10000 1381; CGP-NEXT: v_mov_b32_e32 v6, v4 1382; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1383; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 1384; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 1385; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 1386; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1387; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1388; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1389; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1390; CGP-NEXT: v_trunc_f32_e32 v6, v6 1391; CGP-NEXT: v_trunc_f32_e32 v7, v7 1392; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1393; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 1394; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1395; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 1396; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 1397; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 1398; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 1399; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 1400; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 1401; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 1402; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 1403; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 1404; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 1405; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 1406; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1407; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 1408; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 1409; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 1410; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1411; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 1412; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 1413; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 1414; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1415; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1416; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 1417; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 1418; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 1419; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 1420; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 1421; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1422; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1423; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1424; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 1425; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 1426; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 1427; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1428; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1429; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1430; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1431; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1432; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1433; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1434; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1435; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1436; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1437; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1438; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1439; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1440; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1441; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1442; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1443; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1444; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1445; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1446; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1447; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1448; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1449; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1450; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1451; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1452; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1453; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1454; CGP-NEXT: v_mul_lo_u32 v11, s8, v4 1455; CGP-NEXT: v_mul_lo_u32 v12, -1, v4 1456; CGP-NEXT: v_mul_hi_u32 v14, s8, v4 1457; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1458; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1459; CGP-NEXT: v_mul_lo_u32 v15, s8, v5 1460; CGP-NEXT: v_mul_lo_u32 v16, -1, v5 1461; CGP-NEXT: v_mul_hi_u32 v17, s8, v5 1462; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 1463; CGP-NEXT: v_mul_lo_u32 v19, v13, v15 1464; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 1465; CGP-NEXT: v_mul_hi_u32 v18, v5, v15 1466; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 1467; CGP-NEXT: v_mul_lo_u32 v17, v5, v16 1468; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 1469; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1470; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 1471; CGP-NEXT: v_mul_lo_u32 v17, s8, v10 1472; CGP-NEXT: v_mul_lo_u32 v18, v10, v11 1473; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17 1474; CGP-NEXT: v_mul_hi_u32 v17, v4, v11 1475; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14 1476; CGP-NEXT: v_mul_lo_u32 v14, v4, v12 1477; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 1478; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1479; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 1480; CGP-NEXT: v_mov_b32_e32 v14, s10 1481; CGP-NEXT: v_mov_b32_e32 v17, s11 1482; CGP-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8 1483; CGP-NEXT: v_mov_b32_e32 v8, s13 1484; CGP-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9 1485; CGP-NEXT: v_mul_hi_u32 v9, v10, v11 1486; CGP-NEXT: v_mul_hi_u32 v11, v13, v15 1487; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1488; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 1489; CGP-NEXT: v_mul_lo_u32 v18, v10, v12 1490; CGP-NEXT: v_mul_hi_u32 v10, v10, v12 1491; CGP-NEXT: v_mul_hi_u32 v12, v4, v12 1492; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9 1493; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1494; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12 1495; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] 1496; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12 1497; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1498; CGP-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 1499; CGP-NEXT: v_mul_lo_u32 v19, v13, v16 1500; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 1501; CGP-NEXT: v_mul_hi_u32 v16, v5, v16 1502; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11 1503; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1504; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1505; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1506; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16 1507; CGP-NEXT: v_mov_b32_e32 v19, s14 1508; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 1509; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 1510; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 1511; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1512; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15 1513; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18 1514; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1515; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15 1516; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1517; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5] 1518; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1519; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1520; CGP-NEXT: v_mul_lo_u32 v9, v3, v4 1521; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 1522; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 1523; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 1524; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1525; CGP-NEXT: v_mul_lo_u32 v11, v1, v5 1526; CGP-NEXT: v_mul_hi_u32 v12, v0, v5 1527; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 1528; CGP-NEXT: v_mul_lo_u32 v13, v2, v6 1529; CGP-NEXT: v_mul_lo_u32 v15, v3, v6 1530; CGP-NEXT: v_mul_hi_u32 v16, v2, v6 1531; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 1532; CGP-NEXT: v_mul_lo_u32 v18, v0, v7 1533; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1534; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1535; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1536; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 1537; CGP-NEXT: v_mul_hi_u32 v12, v0, v7 1538; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 1539; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 1540; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1541; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4 1542; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1543; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 1544; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1545; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10 1546; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1547; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16 1548; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1549; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1550; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 1551; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1552; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1553; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10 1554; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v16 1555; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1556; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1557; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1558; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 1559; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1560; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1561; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 1562; CGP-NEXT: v_mul_lo_u32 v13, 0, v4 1563; CGP-NEXT: v_mul_hi_u32 v15, s12, v4 1564; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1565; CGP-NEXT: v_mul_lo_u32 v12, s12, v5 1566; CGP-NEXT: v_mul_lo_u32 v16, 0, v5 1567; CGP-NEXT: v_mul_hi_u32 v18, s12, v5 1568; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1569; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 1570; CGP-NEXT: v_mul_lo_u32 v9, s12, v6 1571; CGP-NEXT: v_mul_lo_u32 v11, s12, v7 1572; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1573; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1574; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v4 1575; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc 1576; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1577; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1578; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v5 1579; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc 1580; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1581; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc 1582; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 1583; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 1584; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1585; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 1586; CGP-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5] 1587; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11 1588; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 1589; CGP-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13 1590; CGP-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9] 1591; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 1592; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0 1593; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 1594; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 1595; CGP-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7] 1596; CGP-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15 1597; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7] 1598; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1599; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1600; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2 1601; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1602; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 1603; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1604; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 1605; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1606; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 1607; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1608; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1609; CGP-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc 1610; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1611; CGP-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc 1612; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1613; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc 1614; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 1615; CGP-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5] 1616; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc 1617; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 1618; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc 1619; CGP-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5] 1620; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 1621; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] 1622; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 1623; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1624; CGP-NEXT: s_setpc_b64 s[30:31] 1625 %result = udiv <2 x i64> %num, <i64 4096, i64 4096> 1626 ret <2 x i64> %result 1627} 1628 1629define i64 @v_udiv_i64_oddk_denom(i64 %num) { 1630; CHECK-LABEL: v_udiv_i64_oddk_denom: 1631; CHECK: ; %bb.0: 1632; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1633; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb 1634; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 1635; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 1636; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb 1637; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 1638; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000 1639; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 1640; CHECK-NEXT: v_mov_b32_e32 v3, s4 1641; CHECK-NEXT: v_mov_b32_e32 v4, s5 1642; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 1643; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 1644; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 1645; CHECK-NEXT: v_trunc_f32_e32 v5, v5 1646; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 1647; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 1648; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 1649; CHECK-NEXT: v_mul_lo_u32 v6, s6, v5 1650; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 1651; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 1652; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2 1653; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1654; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 1655; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7 1656; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 1657; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1658; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6 1659; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 1660; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 1661; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 1662; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1663; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1664; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 1665; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1666; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1667; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1668; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 1669; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1670; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1671; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 1672; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1673; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1674; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1675; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1676; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 1677; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], v5, v6, vcc 1678; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 1679; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 1680; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 1681; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2 1682; CHECK-NEXT: v_mul_lo_u32 v10, s6, v7 1683; CHECK-NEXT: v_mul_lo_u32 v11, v7, v6 1684; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 1685; CHECK-NEXT: v_mul_hi_u32 v6, v7, v6 1686; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 1687; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 1688; CHECK-NEXT: v_mul_lo_u32 v9, v2, v8 1689; CHECK-NEXT: v_mul_lo_u32 v10, v7, v8 1690; CHECK-NEXT: v_mul_hi_u32 v13, v2, v8 1691; CHECK-NEXT: v_mul_hi_u32 v7, v7, v8 1692; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v11, v9 1693; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1694; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 1695; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1696; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 1697; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1698; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v13 1699; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1700; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1701; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 1702; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1703; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1704; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1705; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 1706; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc 1707; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1708; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1709; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 1710; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 1711; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 1712; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 1713; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 1714; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 1715; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 1716; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1717; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1718; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 1719; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1720; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1721; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1722; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 1723; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1724; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1725; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 1726; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1727; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1728; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1729; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 1730; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2 1731; CHECK-NEXT: v_mul_hi_u32 v9, s7, v2 1732; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1733; CHECK-NEXT: v_mul_lo_u32 v6, s7, v5 1734; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2 1735; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc 1736; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1737; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 1738; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 1739; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1740; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 1741; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc 1742; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 1743; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 1744; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1745; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 1746; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 1747; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1748; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 1749; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1750; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 1751; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1752; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1753; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1754; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1755; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc 1756; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc 1757; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1758; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1759; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1760; CHECK-NEXT: s_setpc_b64 s[30:31] 1761 %result = udiv i64 %num, 1235195 1762 ret i64 %result 1763} 1764 1765define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { 1766; GISEL-LABEL: v_udiv_v2i64_oddk_denom: 1767; GISEL: ; %bb.0: 1768; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1769; GISEL-NEXT: s_mov_b32 s12, 0x12d8fb 1770; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 1771; GISEL-NEXT: s_sub_u32 s8, 0, s12 1772; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1773; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 1774; GISEL-NEXT: v_mov_b32_e32 v6, v4 1775; GISEL-NEXT: s_and_b32 s4, s4, 1 1776; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1777; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 1778; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 1779; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 1780; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1781; GISEL-NEXT: s_subb_u32 s9, 0, 0 1782; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000 1783; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000 1784; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1785; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1786; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1787; GISEL-NEXT: s_sub_u32 s13, 0, s12 1788; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1789; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1790; GISEL-NEXT: v_trunc_f32_e32 v6, v6 1791; GISEL-NEXT: s_and_b32 s4, s4, 1 1792; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1793; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1794; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1795; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1796; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1797; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 1798; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1799; GISEL-NEXT: s_subb_u32 s6, 0, 0 1800; GISEL-NEXT: v_mul_lo_u32 v8, s13, v6 1801; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 1802; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 1803; GISEL-NEXT: v_mul_lo_u32 v10, s13, v4 1804; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1805; GISEL-NEXT: v_mul_hi_u32 v12, s13, v4 1806; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 1807; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1808; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1809; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1810; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 1811; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 1812; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 1813; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1814; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 1815; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 1816; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 1817; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1818; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1819; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 1820; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 1821; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1822; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 1823; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 1824; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1825; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1826; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1827; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 1828; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 1829; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 1830; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1831; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1832; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1833; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1834; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1835; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1836; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1837; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1838; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1839; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1840; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1841; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1842; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1843; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1844; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1845; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1846; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1847; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1848; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1849; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1850; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1851; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1852; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1853; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1854; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1855; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1856; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1857; GISEL-NEXT: v_mul_lo_u32 v11, s13, v4 1858; GISEL-NEXT: v_mul_lo_u32 v12, s6, v4 1859; GISEL-NEXT: v_mul_hi_u32 v14, s13, v4 1860; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1861; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1862; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5 1863; GISEL-NEXT: v_mul_lo_u32 v16, s9, v5 1864; GISEL-NEXT: v_mul_hi_u32 v17, s8, v5 1865; GISEL-NEXT: v_mul_lo_u32 v18, s8, v13 1866; GISEL-NEXT: v_mul_lo_u32 v19, v13, v15 1867; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 1868; GISEL-NEXT: v_mul_hi_u32 v18, v5, v15 1869; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 1870; GISEL-NEXT: v_mul_lo_u32 v17, v5, v16 1871; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 1872; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1873; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 1874; GISEL-NEXT: v_mul_lo_u32 v17, s13, v10 1875; GISEL-NEXT: v_mul_lo_u32 v18, v10, v11 1876; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17 1877; GISEL-NEXT: v_mul_hi_u32 v17, v4, v11 1878; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14 1879; GISEL-NEXT: v_mul_lo_u32 v14, v4, v12 1880; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 1881; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1882; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 1883; GISEL-NEXT: v_mov_b32_e32 v14, s10 1884; GISEL-NEXT: v_mov_b32_e32 v17, s11 1885; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000 1886; GISEL-NEXT: s_bfe_i32 s14, -1, 0x10000 1887; GISEL-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8 1888; GISEL-NEXT: v_mov_b32_e32 v8, s13 1889; GISEL-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9 1890; GISEL-NEXT: v_mul_hi_u32 v9, v10, v11 1891; GISEL-NEXT: v_mul_hi_u32 v11, v13, v15 1892; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1893; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 1894; GISEL-NEXT: v_mul_lo_u32 v18, v10, v12 1895; GISEL-NEXT: v_mul_hi_u32 v10, v10, v12 1896; GISEL-NEXT: v_mul_hi_u32 v12, v4, v12 1897; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9 1898; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1899; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12 1900; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] 1901; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12 1902; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1903; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 1904; GISEL-NEXT: v_mul_lo_u32 v19, v13, v16 1905; GISEL-NEXT: v_mul_hi_u32 v13, v13, v16 1906; GISEL-NEXT: v_mul_hi_u32 v16, v5, v16 1907; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11 1908; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1909; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1910; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1911; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16 1912; GISEL-NEXT: v_mov_b32_e32 v19, s14 1913; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 1914; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 1915; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 1916; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 1917; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15 1918; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18 1919; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1920; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15 1921; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1922; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5] 1923; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1924; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1925; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4 1926; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 1927; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 1928; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 1929; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1930; GISEL-NEXT: v_mul_lo_u32 v11, v1, v5 1931; GISEL-NEXT: v_mul_hi_u32 v12, v0, v5 1932; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 1933; GISEL-NEXT: v_mul_lo_u32 v13, v2, v6 1934; GISEL-NEXT: v_mul_lo_u32 v15, v3, v6 1935; GISEL-NEXT: v_mul_hi_u32 v16, v2, v6 1936; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1937; GISEL-NEXT: v_mul_lo_u32 v18, v0, v7 1938; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1939; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1940; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1941; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7 1942; GISEL-NEXT: v_mul_hi_u32 v12, v0, v7 1943; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 1944; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 1945; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1946; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4 1947; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1948; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 1949; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1950; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10 1951; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1952; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16 1953; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1954; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1955; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 1956; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1957; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1958; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 1959; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 1960; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1961; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1962; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1963; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 1964; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1965; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1966; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 1967; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4 1968; GISEL-NEXT: v_mul_hi_u32 v15, s12, v4 1969; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1970; GISEL-NEXT: v_mul_lo_u32 v12, s12, v5 1971; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5 1972; GISEL-NEXT: v_mul_hi_u32 v18, s12, v5 1973; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1974; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 1975; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 1976; GISEL-NEXT: v_mul_lo_u32 v11, s12, v7 1977; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1978; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1979; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v4 1980; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc 1981; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1982; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1983; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v5 1984; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc 1985; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1986; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc 1987; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 1988; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 1989; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1990; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 1991; GISEL-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5] 1992; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11 1993; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 1994; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13 1995; GISEL-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9] 1996; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 1997; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0 1998; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 1999; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 2000; GISEL-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7] 2001; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15 2002; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7] 2003; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2004; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 2005; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2 2006; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2007; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 2008; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 2009; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 2010; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2011; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 2012; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2013; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 2014; GISEL-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc 2015; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2016; GISEL-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc 2017; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2018; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc 2019; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2020; GISEL-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5] 2021; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc 2022; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2023; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc 2024; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5] 2025; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 2026; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] 2027; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 2028; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 2029; GISEL-NEXT: s_setpc_b64 s[30:31] 2030; 2031; CGP-LABEL: v_udiv_v2i64_oddk_denom: 2032; CGP: ; %bb.0: 2033; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2034; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb 2035; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 2036; CGP-NEXT: s_mov_b32 s8, 0xffed2705 2037; CGP-NEXT: s_mov_b32 s12, 0x12d8fb 2038; CGP-NEXT: s_bfe_i32 s10, -1, 0x10000 2039; CGP-NEXT: s_bfe_i32 s11, -1, 0x10000 2040; CGP-NEXT: s_bfe_i32 s13, -1, 0x10000 2041; CGP-NEXT: s_bfe_i32 s14, -1, 0x10000 2042; CGP-NEXT: v_mov_b32_e32 v6, v4 2043; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 2044; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 2045; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 2046; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 2047; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2048; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 2049; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 2050; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 2051; CGP-NEXT: v_trunc_f32_e32 v6, v6 2052; CGP-NEXT: v_trunc_f32_e32 v7, v7 2053; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 2054; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 2055; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 2056; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 2057; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2058; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 2059; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 2060; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 2061; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 2062; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 2063; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 2064; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 2065; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 2066; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 2067; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2068; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 2069; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 2070; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 2071; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 2072; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 2073; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 2074; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 2075; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2076; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 2077; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 2078; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 2079; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 2080; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 2081; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 2082; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 2083; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 2084; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 2085; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 2086; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 2087; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 2088; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 2089; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2090; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 2091; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2092; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2093; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2094; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 2095; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2096; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 2097; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 2098; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2099; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 2100; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2101; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 2102; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 2103; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 2104; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 2105; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2106; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2107; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 2108; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2109; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 2110; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 2111; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 2112; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 2113; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2114; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 2115; CGP-NEXT: v_mul_lo_u32 v11, s8, v4 2116; CGP-NEXT: v_mul_lo_u32 v12, -1, v4 2117; CGP-NEXT: v_mul_hi_u32 v14, s8, v4 2118; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 2119; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 2120; CGP-NEXT: v_mul_lo_u32 v15, s8, v5 2121; CGP-NEXT: v_mul_lo_u32 v16, -1, v5 2122; CGP-NEXT: v_mul_hi_u32 v17, s8, v5 2123; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 2124; CGP-NEXT: v_mul_lo_u32 v19, v13, v15 2125; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 2126; CGP-NEXT: v_mul_hi_u32 v18, v5, v15 2127; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 2128; CGP-NEXT: v_mul_lo_u32 v17, v5, v16 2129; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 2130; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 2131; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 2132; CGP-NEXT: v_mul_lo_u32 v17, s8, v10 2133; CGP-NEXT: v_mul_lo_u32 v18, v10, v11 2134; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17 2135; CGP-NEXT: v_mul_hi_u32 v17, v4, v11 2136; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14 2137; CGP-NEXT: v_mul_lo_u32 v14, v4, v12 2138; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 2139; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 2140; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 2141; CGP-NEXT: v_mov_b32_e32 v14, s10 2142; CGP-NEXT: v_mov_b32_e32 v17, s11 2143; CGP-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8 2144; CGP-NEXT: v_mov_b32_e32 v8, s13 2145; CGP-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9 2146; CGP-NEXT: v_mul_hi_u32 v9, v10, v11 2147; CGP-NEXT: v_mul_hi_u32 v11, v13, v15 2148; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 2149; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 2150; CGP-NEXT: v_mul_lo_u32 v18, v10, v12 2151; CGP-NEXT: v_mul_hi_u32 v10, v10, v12 2152; CGP-NEXT: v_mul_hi_u32 v12, v4, v12 2153; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9 2154; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 2155; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12 2156; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] 2157; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12 2158; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 2159; CGP-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 2160; CGP-NEXT: v_mul_lo_u32 v19, v13, v16 2161; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 2162; CGP-NEXT: v_mul_hi_u32 v16, v5, v16 2163; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11 2164; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 2165; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 2166; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 2167; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16 2168; CGP-NEXT: v_mov_b32_e32 v19, s14 2169; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 2170; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 2171; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 2172; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 2173; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15 2174; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18 2175; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 2176; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15 2177; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 2178; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5] 2179; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 2180; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 2181; CGP-NEXT: v_mul_lo_u32 v9, v3, v4 2182; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 2183; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 2184; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 2185; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 2186; CGP-NEXT: v_mul_lo_u32 v11, v1, v5 2187; CGP-NEXT: v_mul_hi_u32 v12, v0, v5 2188; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 2189; CGP-NEXT: v_mul_lo_u32 v13, v2, v6 2190; CGP-NEXT: v_mul_lo_u32 v15, v3, v6 2191; CGP-NEXT: v_mul_hi_u32 v16, v2, v6 2192; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 2193; CGP-NEXT: v_mul_lo_u32 v18, v0, v7 2194; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 2195; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2196; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2197; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 2198; CGP-NEXT: v_mul_hi_u32 v12, v0, v7 2199; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 2200; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 2201; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2202; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4 2203; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2204; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 2205; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2206; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10 2207; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2208; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16 2209; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2210; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2211; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 2212; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2213; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 2214; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10 2215; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v16 2216; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2217; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 2218; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2219; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 2220; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2221; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 2222; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 2223; CGP-NEXT: v_mul_lo_u32 v13, 0, v4 2224; CGP-NEXT: v_mul_hi_u32 v15, s12, v4 2225; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2226; CGP-NEXT: v_mul_lo_u32 v12, s12, v5 2227; CGP-NEXT: v_mul_lo_u32 v16, 0, v5 2228; CGP-NEXT: v_mul_hi_u32 v18, s12, v5 2229; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 2230; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 2231; CGP-NEXT: v_mul_lo_u32 v9, s12, v6 2232; CGP-NEXT: v_mul_lo_u32 v11, s12, v7 2233; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 2234; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 2235; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v4 2236; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc 2237; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 2238; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 2239; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v5 2240; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc 2241; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 2242; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc 2243; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 2244; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 2245; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 2246; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 2247; CGP-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5] 2248; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11 2249; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 2250; CGP-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13 2251; CGP-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9] 2252; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 2253; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0 2254; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 2255; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 2256; CGP-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7] 2257; CGP-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15 2258; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7] 2259; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2260; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 2261; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2 2262; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2263; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 2264; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 2265; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 2266; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2267; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 2268; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2269; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 2270; CGP-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc 2271; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2272; CGP-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc 2273; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2274; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc 2275; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2276; CGP-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5] 2277; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc 2278; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2279; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc 2280; CGP-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5] 2281; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 2282; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] 2283; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 2284; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 2285; CGP-NEXT: s_setpc_b64 s[30:31] 2286 %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195> 2287 ret <2 x i64> %result 2288} 2289 2290define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { 2291; CHECK-LABEL: v_udiv_i64_pow2_shl_denom: 2292; CHECK: ; %bb.0: 2293; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2294; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 2295; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 2296; CHECK-NEXT: v_or_b32_e32 v3, v1, v5 2297; CHECK-NEXT: v_mov_b32_e32 v2, 0 2298; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 2299; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 2300; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 2301; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2302; CHECK-NEXT: s_cbranch_execz BB7_2 2303; CHECK-NEXT: ; %bb.1: 2304; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 2305; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 2306; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 2307; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc 2308; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 2309; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 2310; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 2311; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 2312; CHECK-NEXT: v_trunc_f32_e32 v3, v3 2313; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 2314; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 2315; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 2316; CHECK-NEXT: v_mul_lo_u32 v8, v6, v3 2317; CHECK-NEXT: v_mul_lo_u32 v9, v6, v2 2318; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 2319; CHECK-NEXT: v_mul_hi_u32 v11, v6, v2 2320; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 2321; CHECK-NEXT: v_mul_lo_u32 v10, v3, v9 2322; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 2323; CHECK-NEXT: v_mul_hi_u32 v9, v3, v9 2324; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 2325; CHECK-NEXT: v_mul_lo_u32 v11, v2, v8 2326; CHECK-NEXT: v_mul_lo_u32 v13, v3, v8 2327; CHECK-NEXT: v_mul_hi_u32 v14, v2, v8 2328; CHECK-NEXT: v_mul_hi_u32 v8, v3, v8 2329; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2330; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2331; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 2332; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2333; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2334; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2335; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 2336; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2337; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2338; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 2339; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2340; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2341; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2342; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2343; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 2344; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v3, v8, vcc 2345; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v8 2346; CHECK-NEXT: v_mul_lo_u32 v8, v6, v2 2347; CHECK-NEXT: v_mul_lo_u32 v7, v7, v2 2348; CHECK-NEXT: v_mul_hi_u32 v10, v6, v2 2349; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 2350; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 2351; CHECK-NEXT: v_mul_hi_u32 v12, v2, v8 2352; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 2353; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 2354; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 2355; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 2356; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 2357; CHECK-NEXT: v_mul_hi_u32 v13, v2, v6 2358; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 2359; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 2360; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2361; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 2362; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2363; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 2364; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 2365; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 2366; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2367; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 2368; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 2369; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 2370; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 2371; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 2372; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 2373; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc 2374; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 2375; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2376; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 2377; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 2378; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 2379; CHECK-NEXT: v_mul_lo_u32 v8, v0, v3 2380; CHECK-NEXT: v_mul_lo_u32 v9, v1, v3 2381; CHECK-NEXT: v_mul_hi_u32 v10, v0, v3 2382; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 2383; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2384; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2385; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 2386; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2387; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2388; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2389; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 2390; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2391; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2392; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 2393; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 2394; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2395; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 2396; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 2397; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 2398; CHECK-NEXT: v_mul_hi_u32 v9, v4, v2 2399; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 2400; CHECK-NEXT: v_mul_lo_u32 v6, v4, v3 2401; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2 2402; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc 2403; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2404; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 2405; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 2406; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 2407; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 2408; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc 2409; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 2410; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 2411; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 2412; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v5 2413; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 2414; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 2415; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v5 2416; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc 2417; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 2418; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2419; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2420; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2421; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 2422; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 2423; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 2424; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 2425; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 2426; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc 2427; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc 2428; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 2429; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 2430; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 2431; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 2432; CHECK-NEXT: ; implicit-def: $vgpr0 2433; CHECK-NEXT: BB7_2: ; %Flow 2434; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 2435; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] 2436; CHECK-NEXT: s_cbranch_execz BB7_4 2437; CHECK-NEXT: ; %bb.3: 2438; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 2439; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 2440; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 2441; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2442; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 2443; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 2444; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 2445; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 2446; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 2447; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 2448; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 2449; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 2450; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2451; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2452; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v4 2453; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2454; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 2455; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2456; CHECK-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2457; CHECK-NEXT: v_mov_b32_e32 v3, 0 2458; CHECK-NEXT: BB7_4: 2459; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 2460; CHECK-NEXT: v_mov_b32_e32 v0, v2 2461; CHECK-NEXT: v_mov_b32_e32 v1, v3 2462; CHECK-NEXT: s_setpc_b64 s[30:31] 2463 %shl.y = shl i64 4096, %y 2464 %r = udiv i64 %x, %shl.y 2465 ret i64 %r 2466} 2467 2468define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { 2469; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom: 2470; GISEL: ; %bb.0: 2471; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2472; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000 2473; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 2474; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 2475; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 2476; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 2477; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 2478; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 2479; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 2480; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 2481; GISEL-NEXT: v_trunc_f32_e32 v9, v9 2482; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 2483; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 2484; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 2485; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 2486; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 2487; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 2488; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 2489; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 2490; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 2491; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2492; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 2493; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 2494; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 2495; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 2496; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 2497; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2498; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2499; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2500; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2501; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 2502; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 2503; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 2504; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 2505; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2506; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 2507; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2508; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 2509; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2510; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2511; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2512; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 2513; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2514; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2515; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 2516; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 2517; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 2518; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 2519; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 2520; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 2521; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 2522; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 2523; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 2524; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 2525; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 2526; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 2527; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2528; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 2529; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2530; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 2531; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 2532; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 2533; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 2534; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2535; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2536; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 2537; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2538; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 2539; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 2540; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2541; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2542; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 2543; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 2544; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 2545; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 2546; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 2547; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 2548; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 2549; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 2550; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2551; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2552; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2553; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2554; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2555; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 2556; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 2557; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 2558; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2559; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2560; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2561; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2562; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2563; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2564; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2565; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2566; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 2567; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2568; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 2569; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 2570; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9 2571; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 2572; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2573; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 2574; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 2575; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc 2576; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 2577; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 2578; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 2579; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 2580; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 2581; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 2582; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] 2583; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 2584; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 2585; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 2586; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 2587; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc 2588; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 2589; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 2590; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2591; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2592; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 2593; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc 2594; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 2595; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc 2596; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 2597; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc 2598; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc 2599; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 2600; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 2601; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 2602; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 2603; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 2604; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 2605; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 2606; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2607; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 2608; GISEL-NEXT: v_trunc_f32_e32 v5, v5 2609; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 2610; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 2611; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 2612; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 2613; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 2614; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 2615; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 2616; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 2617; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 2618; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2619; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 2620; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 2621; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 2622; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 2623; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2624; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2625; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2626; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2627; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2628; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 2629; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 2630; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 2631; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 2632; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2633; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 2634; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2635; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2636; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2637; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2638; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2639; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 2640; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2641; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2642; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc 2643; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 2644; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 2645; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 2646; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 2647; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 2648; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 2649; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 2650; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 2651; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 2652; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 2653; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 2654; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2655; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 2656; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2657; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 2658; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 2659; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 2660; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 2661; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 2662; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2663; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 2664; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2665; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 2666; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 2667; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2668; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 2669; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 2670; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 2671; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 2672; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 2673; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] 2674; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 2675; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 2676; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 2677; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 2678; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2679; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2680; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2681; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2682; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 2683; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 2684; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 2685; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 2686; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2687; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2688; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2689; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2690; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2691; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2692; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2693; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 2694; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 2695; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 2696; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 2697; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5 2698; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4 2699; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2700; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 2701; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 2702; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc 2703; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 2704; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 2705; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 2706; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 2707; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 2708; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7 2709; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] 2710; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 2711; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 2712; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] 2713; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 2714; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc 2715; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 2716; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 2717; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 2718; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 2719; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 2720; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc 2721; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 2722; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc 2723; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2724; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc 2725; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc 2726; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2727; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 2728; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 2729; GISEL-NEXT: s_setpc_b64 s[30:31] 2730; 2731; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom: 2732; CGP: ; %bb.0: 2733; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2734; CGP-NEXT: v_mov_b32_e32 v5, v0 2735; CGP-NEXT: v_mov_b32_e32 v7, v1 2736; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 2737; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 2738; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 2739; CGP-NEXT: v_or_b32_e32 v1, v7, v11 2740; CGP-NEXT: v_mov_b32_e32 v0, 0 2741; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 2742; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 2743; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 2744; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2745; CGP-NEXT: s_cbranch_execz BB8_2 2746; CGP-NEXT: ; %bb.1: 2747; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 2748; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 2749; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10 2750; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc 2751; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 2752; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 2753; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 2754; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 2755; CGP-NEXT: v_trunc_f32_e32 v1, v1 2756; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 2757; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 2758; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2759; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 2760; CGP-NEXT: v_mul_lo_u32 v13, v4, v0 2761; CGP-NEXT: v_mul_lo_u32 v14, v6, v0 2762; CGP-NEXT: v_mul_hi_u32 v15, v4, v0 2763; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 2764; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 2765; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 2766; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 2767; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 2768; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 2769; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 2770; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 2771; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 2772; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 2773; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2774; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 2775; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2776; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2777; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2778; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 2779; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2780; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2781; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 2782; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2783; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2784; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2785; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2786; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 2787; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 2788; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 2789; CGP-NEXT: v_mul_lo_u32 v12, v4, v0 2790; CGP-NEXT: v_mul_lo_u32 v6, v6, v0 2791; CGP-NEXT: v_mul_hi_u32 v14, v4, v0 2792; CGP-NEXT: v_mul_lo_u32 v4, v4, v13 2793; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 2794; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 2795; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 2796; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 2797; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v14 2798; CGP-NEXT: v_mul_lo_u32 v6, v0, v4 2799; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 2800; CGP-NEXT: v_mul_hi_u32 v17, v0, v4 2801; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 2802; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 2803; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2804; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 2805; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2806; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 2807; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 2808; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 2809; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2810; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 2811; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 2812; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 2813; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2814; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 2815; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 2816; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc 2817; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 2818; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2819; CGP-NEXT: v_mul_lo_u32 v4, v7, v0 2820; CGP-NEXT: v_mul_hi_u32 v6, v5, v0 2821; CGP-NEXT: v_mul_hi_u32 v0, v7, v0 2822; CGP-NEXT: v_mul_lo_u32 v12, v5, v1 2823; CGP-NEXT: v_mul_lo_u32 v13, v7, v1 2824; CGP-NEXT: v_mul_hi_u32 v14, v5, v1 2825; CGP-NEXT: v_mul_hi_u32 v1, v7, v1 2826; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2827; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2828; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 2829; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2830; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 2831; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2832; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 2833; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2834; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 2835; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 2836; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 2837; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2838; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 2839; CGP-NEXT: v_mul_lo_u32 v6, v10, v0 2840; CGP-NEXT: v_mul_lo_u32 v12, v11, v0 2841; CGP-NEXT: v_mul_hi_u32 v13, v10, v0 2842; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 2843; CGP-NEXT: v_mul_lo_u32 v4, v10, v1 2844; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 2845; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc 2846; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 2847; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 2848; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 2849; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 2850; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 2851; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc 2852; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 2853; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 2854; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2855; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v11 2856; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 2857; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v11, vcc 2858; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v11 2859; CGP-NEXT: v_cndmask_b32_e32 v6, v13, v7, vcc 2860; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v10 2861; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 2862; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v10 2863; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 2864; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v11 2865; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 2866; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v11 2867; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc 2868; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2869; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v12, vcc 2870; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v16, vcc 2871; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 2872; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2873; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2874; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 2875; CGP-NEXT: ; implicit-def: $vgpr5 2876; CGP-NEXT: BB8_2: ; %Flow2 2877; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 2878; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 2879; CGP-NEXT: s_cbranch_execz BB8_4 2880; CGP-NEXT: ; %bb.3: 2881; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 2882; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 2883; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 2884; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2885; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2886; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 2887; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 2888; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 2889; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 2890; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 2891; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 2892; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 2893; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 2894; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2895; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v10 2896; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2897; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 2898; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 2899; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2900; CGP-NEXT: v_mov_b32_e32 v1, 0 2901; CGP-NEXT: BB8_4: 2902; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 2903; CGP-NEXT: v_or_b32_e32 v5, v3, v9 2904; CGP-NEXT: v_mov_b32_e32 v4, 0 2905; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 2906; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 2907; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 2908; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2909; CGP-NEXT: s_cbranch_execz BB8_6 2910; CGP-NEXT: ; %bb.5: 2911; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 2912; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9 2913; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8 2914; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc 2915; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 2916; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 2917; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2918; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 2919; CGP-NEXT: v_trunc_f32_e32 v5, v5 2920; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 2921; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 2922; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2923; CGP-NEXT: v_mul_lo_u32 v10, v6, v5 2924; CGP-NEXT: v_mul_lo_u32 v11, v6, v4 2925; CGP-NEXT: v_mul_lo_u32 v12, v7, v4 2926; CGP-NEXT: v_mul_hi_u32 v13, v6, v4 2927; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 2928; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 2929; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 2930; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 2931; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 2932; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 2933; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 2934; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 2935; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 2936; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2937; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2938; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 2939; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2940; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2941; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2942; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 2943; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2944; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2945; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 2946; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2947; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2948; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2949; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2950; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 2951; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc 2952; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 2953; CGP-NEXT: v_mul_lo_u32 v10, v6, v4 2954; CGP-NEXT: v_mul_lo_u32 v7, v7, v4 2955; CGP-NEXT: v_mul_hi_u32 v12, v6, v4 2956; CGP-NEXT: v_mul_lo_u32 v6, v6, v11 2957; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 2958; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 2959; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 2960; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 2961; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 2962; CGP-NEXT: v_mul_lo_u32 v7, v4, v6 2963; CGP-NEXT: v_mul_lo_u32 v12, v11, v6 2964; CGP-NEXT: v_mul_hi_u32 v15, v4, v6 2965; CGP-NEXT: v_mul_hi_u32 v6, v11, v6 2966; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v7 2967; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2968; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 2969; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2970; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v14 2971; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 2972; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 2973; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2974; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 2975; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 2976; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 2977; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2978; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 2979; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 2980; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc 2981; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 2982; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2983; CGP-NEXT: v_mul_lo_u32 v6, v3, v4 2984; CGP-NEXT: v_mul_hi_u32 v7, v2, v4 2985; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 2986; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 2987; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 2988; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 2989; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 2990; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 2991; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2992; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 2993; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2994; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2995; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2996; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2997; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2998; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 2999; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 3000; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 3001; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 3002; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 3003; CGP-NEXT: v_mul_lo_u32 v7, v8, v4 3004; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 3005; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 3006; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 3007; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 3008; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v4 3009; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v5, vcc 3010; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 3011; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 3012; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc 3013; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 3014; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 3015; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v6, vcc 3016; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 3017; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v8 3018; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 3019; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v9 3020; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 3021; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc 3022; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9 3023; CGP-NEXT: v_cndmask_b32_e32 v6, v11, v6, vcc 3024; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 3025; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 3026; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 3027; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 3028; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 3029; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 3030; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 3031; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 3032; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 3033; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v10, vcc 3034; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v14, vcc 3035; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 3036; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc 3037; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc 3038; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 3039; CGP-NEXT: ; implicit-def: $vgpr2 3040; CGP-NEXT: BB8_6: ; %Flow 3041; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 3042; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 3043; CGP-NEXT: s_cbranch_execz BB8_8 3044; CGP-NEXT: ; %bb.7: 3045; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 3046; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 3047; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 3048; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 3049; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 3050; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 3051; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 3052; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 3053; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 3054; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 3055; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 3056; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 3057; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 3058; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 3059; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v8 3060; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3061; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 3062; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 3063; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 3064; CGP-NEXT: v_mov_b32_e32 v5, 0 3065; CGP-NEXT: BB8_8: 3066; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 3067; CGP-NEXT: v_mov_b32_e32 v2, v4 3068; CGP-NEXT: v_mov_b32_e32 v3, v5 3069; CGP-NEXT: s_setpc_b64 s[30:31] 3070 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y 3071 %r = udiv <2 x i64> %x, %shl.y 3072 ret <2 x i64> %r 3073} 3074 3075define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) { 3076; GISEL-LABEL: v_udiv_i64_24bit: 3077; GISEL: ; %bb.0: 3078; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3079; GISEL-NEXT: s_mov_b32 s4, 0xffffff 3080; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 3081; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 3082; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 3083; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 3084; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 3085; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 3086; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 3087; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 3088; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 3089; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 3090; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 3091; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 3092; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 3093; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 3094; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 3095; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3096; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 3097; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3098; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 3099; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 3100; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 3101; GISEL-NEXT: v_mov_b32_e32 v1, 0 3102; GISEL-NEXT: s_setpc_b64 s[30:31] 3103; 3104; CGP-LABEL: v_udiv_i64_24bit: 3105; CGP: ; %bb.0: 3106; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3107; CGP-NEXT: s_mov_b32 s4, 0xffffff 3108; CGP-NEXT: v_and_b32_e32 v0, s4, v0 3109; CGP-NEXT: v_and_b32_e32 v1, s4, v2 3110; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 3111; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1 3112; CGP-NEXT: v_rcp_f32_e32 v2, v1 3113; CGP-NEXT: v_mul_f32_e32 v2, v0, v2 3114; CGP-NEXT: v_trunc_f32_e32 v2, v2 3115; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0 3116; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 3117; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1 3118; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 3119; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 3120; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 3121; CGP-NEXT: v_mov_b32_e32 v1, 0 3122; CGP-NEXT: s_setpc_b64 s[30:31] 3123 %num.mask = and i64 %num, 16777215 3124 %den.mask = and i64 %den, 16777215 3125 %result = udiv i64 %num.mask, %den.mask 3126 ret i64 %result 3127} 3128 3129define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { 3130; GISEL-LABEL: v_udiv_v2i64_24bit: 3131; GISEL: ; %bb.0: 3132; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3133; GISEL-NEXT: s_mov_b32 s6, 0xffffff 3134; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v1, 0 3135; GISEL-NEXT: v_and_b32_e32 v3, s6, v4 3136; GISEL-NEXT: v_and_b32_e32 v4, s6, v6 3137; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 3138; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 3139; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc 3140; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 3141; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 3142; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc 3143; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v1 3144; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v1 3145; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 3146; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v8 3147; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 3148; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 3149; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v1 3150; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5 3151; GISEL-NEXT: v_trunc_f32_e32 v8, v8 3152; GISEL-NEXT: v_trunc_f32_e32 v11, v11 3153; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v8 3154; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 3155; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11 3156; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 3157; GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 3158; GISEL-NEXT: v_mul_lo_u32 v12, v6, v8 3159; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 3160; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 3161; GISEL-NEXT: v_mul_lo_u32 v14, v6, v1 3162; GISEL-NEXT: v_mul_lo_u32 v15, v7, v1 3163; GISEL-NEXT: v_mul_hi_u32 v16, v6, v1 3164; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 3165; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 3166; GISEL-NEXT: v_mul_hi_u32 v19, v9, v5 3167; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 3168; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 3169; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 3170; GISEL-NEXT: v_mul_hi_u32 v18, v5, v17 3171; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 3172; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13 3173; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 3174; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 3175; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 3176; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 3177; GISEL-NEXT: v_mul_hi_u32 v18, v1, v14 3178; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 3179; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 3180; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 3181; GISEL-NEXT: v_mul_lo_u32 v16, v1, v12 3182; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 3183; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 3184; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 3185; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 3186; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 3187; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 3188; GISEL-NEXT: v_mul_hi_u32 v18, v1, v12 3189; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 3190; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 3191; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 3192; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 3193; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 3194; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3195; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 3196; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 3197; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 3198; GISEL-NEXT: v_mul_hi_u32 v19, v5, v13 3199; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 3200; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 3201; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 3202; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19 3203; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000 3204; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000 3205; GISEL-NEXT: s_bfe_i32 s12, -1, 0x10000 3206; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000 3207; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 3208; GISEL-NEXT: v_and_b32_e32 v2, s6, v2 3209; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 3210; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 3211; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 3212; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 3213; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 3214; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3215; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 3216; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 3217; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 3218; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 3219; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 3220; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v8, v12, vcc 3221; GISEL-NEXT: v_mul_lo_u32 v15, v6, v1 3222; GISEL-NEXT: v_mul_lo_u32 v7, v7, v1 3223; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 3224; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5] 3225; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 3226; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5 3227; GISEL-NEXT: v_mul_hi_u32 v18, v9, v5 3228; GISEL-NEXT: v_mul_lo_u32 v9, v9, v16 3229; GISEL-NEXT: v_mul_lo_u32 v19, v16, v17 3230; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v10, v9 3231; GISEL-NEXT: v_mul_hi_u32 v10, v5, v17 3232; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 3233; GISEL-NEXT: v_mul_lo_u32 v18, v5, v9 3234; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 3235; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 3236; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v18, v10 3237; GISEL-NEXT: v_mul_hi_u32 v10, v6, v1 3238; GISEL-NEXT: v_mul_lo_u32 v6, v6, v14 3239; GISEL-NEXT: v_mul_lo_u32 v18, v14, v15 3240; GISEL-NEXT: v_add_i32_e64 v6, s[8:9], v7, v6 3241; GISEL-NEXT: v_mul_hi_u32 v7, v1, v15 3242; GISEL-NEXT: v_add_i32_e64 v6, s[8:9], v6, v10 3243; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 3244; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v10 3245; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 3246; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v10, v7 3247; GISEL-NEXT: v_mov_b32_e32 v7, s10 3248; GISEL-NEXT: v_mov_b32_e32 v10, s11 3249; GISEL-NEXT: v_add_i32_e64 v8, s[10:11], v8, v12 3250; GISEL-NEXT: v_mov_b32_e32 v12, s12 3251; GISEL-NEXT: v_add_i32_e64 v11, s[10:11], v11, v13 3252; GISEL-NEXT: v_mul_hi_u32 v13, v14, v15 3253; GISEL-NEXT: v_mul_hi_u32 v15, v16, v17 3254; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 3255; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 3256; GISEL-NEXT: v_mul_lo_u32 v18, v14, v6 3257; GISEL-NEXT: v_mul_hi_u32 v14, v14, v6 3258; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 3259; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v18, v13 3260; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 3261; GISEL-NEXT: v_add_i32_e64 v6, s[8:9], v13, v6 3262; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[8:9] 3263; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v18, v13 3264; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 3265; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 3266; GISEL-NEXT: v_mul_lo_u32 v19, v16, v9 3267; GISEL-NEXT: v_mul_hi_u32 v16, v16, v9 3268; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 3269; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v19, v15 3270; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 3271; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v15, v9 3272; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 3273; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v19, v15 3274; GISEL-NEXT: v_mov_b32_e32 v19, s13 3275; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v17 3276; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] 3277; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 3278; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 3279; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v17 3280; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 3281; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v14, v13 3282; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v15 3283; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc 3284; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v14, s[4:5] 3285; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 3286; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v8, vcc 3287; GISEL-NEXT: v_mul_lo_u32 v8, 0, v1 3288; GISEL-NEXT: v_mul_hi_u32 v13, v0, v1 3289; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1 3290; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 3291; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc 3292; GISEL-NEXT: v_mul_lo_u32 v11, 0, v5 3293; GISEL-NEXT: v_mul_hi_u32 v14, v2, v5 3294; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 3295; GISEL-NEXT: v_mul_lo_u32 v15, v0, v6 3296; GISEL-NEXT: v_mul_lo_u32 v16, 0, v6 3297; GISEL-NEXT: v_mul_hi_u32 v17, v0, v6 3298; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 3299; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9 3300; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 3301; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3302; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 3303; GISEL-NEXT: v_mul_lo_u32 v11, 0, v9 3304; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 3305; GISEL-NEXT: v_mul_hi_u32 v9, 0, v9 3306; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v15 3307; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 3308; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v16, v1 3309; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 3310; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 3311; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 3312; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 3313; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 3314; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v17 3315; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 3316; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 3317; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 3318; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 3319; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 3320; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 3321; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17 3322; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 3323; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 3324; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 3325; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 3326; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 3327; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 3328; GISEL-NEXT: v_mul_lo_u32 v13, v3, v1 3329; GISEL-NEXT: v_mul_lo_u32 v15, 0, v1 3330; GISEL-NEXT: v_mul_hi_u32 v16, v3, v1 3331; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 3332; GISEL-NEXT: v_mul_lo_u32 v14, v4, v5 3333; GISEL-NEXT: v_mul_lo_u32 v17, 0, v5 3334; GISEL-NEXT: v_mul_hi_u32 v18, v4, v5 3335; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 3336; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v11 3337; GISEL-NEXT: v_mul_lo_u32 v9, v3, v6 3338; GISEL-NEXT: v_mul_lo_u32 v11, v4, v8 3339; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 3340; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 3341; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v1 3342; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v6, vcc 3343; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 3344; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 3345; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v13 3346; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, v9, vcc 3347; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 3348; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] 3349; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 3350; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], 1, v5 3351; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, v8, s[6:7] 3352; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v14 3353; GISEL-NEXT: v_subb_u32_e64 v14, s[8:9], 0, v11, s[6:7] 3354; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v16, s[4:5] 3355; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 3356; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] 3357; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 3358; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, v16, s[4:5] 3359; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], 1, v15 3360; GISEL-NEXT: v_addc_u32_e64 v16, s[4:5], 0, v17, s[4:5] 3361; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], 0, v11 3362; GISEL-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[6:7] 3363; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v4 3364; GISEL-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[4:5] 3365; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 3366; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 1, v13 3367; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], 0, v18, s[6:7] 3368; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], 0, v9 3369; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 3370; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 3371; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 3372; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 3373; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 3374; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] 3375; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 3376; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc 3377; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 3378; GISEL-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc 3379; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 3380; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v14, vcc 3381; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v3 3382; GISEL-NEXT: v_cndmask_b32_e64 v2, v13, v2, s[4:5] 3383; GISEL-NEXT: v_cndmask_b32_e32 v3, v17, v16, vcc 3384; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 3385; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3386; GISEL-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[4:5] 3387; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12 3388; GISEL-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] 3389; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc 3390; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v4, s[4:5] 3391; GISEL-NEXT: s_setpc_b64 s[30:31] 3392; 3393; CGP-LABEL: v_udiv_v2i64_24bit: 3394; CGP: ; %bb.0: 3395; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3396; CGP-NEXT: s_mov_b32 s6, 0xffffff 3397; CGP-NEXT: v_and_b32_e32 v0, s6, v0 3398; CGP-NEXT: v_and_b32_e32 v1, s6, v2 3399; CGP-NEXT: v_and_b32_e32 v2, s6, v4 3400; CGP-NEXT: v_and_b32_e32 v3, s6, v6 3401; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 3402; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2 3403; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1 3404; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3 3405; CGP-NEXT: v_rcp_f32_e32 v4, v2 3406; CGP-NEXT: v_rcp_f32_e32 v5, v3 3407; CGP-NEXT: v_mul_f32_e32 v4, v0, v4 3408; CGP-NEXT: v_mul_f32_e32 v5, v1, v5 3409; CGP-NEXT: v_trunc_f32_e32 v4, v4 3410; CGP-NEXT: v_trunc_f32_e32 v5, v5 3411; CGP-NEXT: v_mad_f32 v0, -v4, v2, v0 3412; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 3413; CGP-NEXT: v_mad_f32 v1, -v5, v3, v1 3414; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 3415; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 3416; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 3417; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, v3 3418; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 3419; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 3420; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 3421; CGP-NEXT: v_and_b32_e32 v0, s6, v0 3422; CGP-NEXT: v_and_b32_e32 v2, s6, v1 3423; CGP-NEXT: v_mov_b32_e32 v1, 0 3424; CGP-NEXT: v_mov_b32_e32 v3, 0 3425; CGP-NEXT: s_setpc_b64 s[30:31] 3426 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215> 3427 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215> 3428 %result = udiv <2 x i64> %num.mask, %den.mask 3429 ret <2 x i64> %result 3430} 3431