1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i64 @v_udiv_i64(i64 %num, i64 %den) { 8; CHECK-LABEL: v_udiv_i64: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CHECK-NEXT: v_mov_b32_e32 v4, v0 12; CHECK-NEXT: v_mov_b32_e32 v5, v1 13; CHECK-NEXT: v_or_b32_e32 v1, v5, v3 14; CHECK-NEXT: v_mov_b32_e32 v0, 0 15; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 16; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 17; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 18; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 19; CHECK-NEXT: s_cbranch_execz BB0_2 20; CHECK-NEXT: ; %bb.1: 21; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2 22; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v3 23; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 24; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc 25; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 26; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 27; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 28; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 29; CHECK-NEXT: v_trunc_f32_e32 v1, v1 30; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 31; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 32; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 33; CHECK-NEXT: v_mul_lo_u32 v8, v6, v1 34; CHECK-NEXT: v_mul_lo_u32 v9, v6, v0 35; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0 36; CHECK-NEXT: v_mul_hi_u32 v11, v6, v0 37; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 38; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9 39; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9 40; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9 41; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 42; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8 43; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8 44; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 45; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8 46; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 47; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 48; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 49; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 50; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 51; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 52; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 53; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 54; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 55; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 56; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 57; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 58; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 59; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 60; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 61; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v1, v8, vcc 62; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v1, v8 63; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 64; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0 65; CHECK-NEXT: v_mul_hi_u32 v10, v6, v0 66; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 67; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 68; CHECK-NEXT: v_mul_hi_u32 v12, v0, v8 69; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 70; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 71; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 72; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 73; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 74; CHECK-NEXT: v_mul_hi_u32 v13, v0, v6 75; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 76; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 77; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 78; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 79; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 80; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 81; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 82; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 83; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 84; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 85; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 86; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 87; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 88; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 89; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 90; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc 91; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 92; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 93; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0 94; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0 95; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 96; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1 97; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1 98; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 99; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 100; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 101; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 102; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 103; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 104; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 105; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 106; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 107; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 108; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 109; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 110; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 111; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 112; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 113; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 114; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 115; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0 116; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 117; CHECK-NEXT: v_mul_lo_u32 v6, v2, v1 118; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 119; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc 120; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 121; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 122; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 123; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 124; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 125; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v5, v6, vcc 126; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v6 127; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 128; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 129; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 130; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 131; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc 132; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 133; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc 134; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 135; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 136; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 137; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 138; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 139; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 140; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3 141; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 142; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 143; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc 144; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc 145; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 146; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 147; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 148; CHECK-NEXT: ; implicit-def: $vgpr2 149; CHECK-NEXT: ; implicit-def: $vgpr4 150; CHECK-NEXT: BB0_2: ; %Flow 151; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 152; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] 153; CHECK-NEXT: s_cbranch_execz BB0_4 154; CHECK-NEXT: ; %bb.3: 155; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2 156; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 157; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 158; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 159; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 160; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 161; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 162; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 163; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 164; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2 165; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0 166; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 167; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 168; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 169; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2 170; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 171; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0 172; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 173; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 174; CHECK-NEXT: v_mov_b32_e32 v1, 0 175; CHECK-NEXT: BB0_4: 176; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 177; CHECK-NEXT: s_setpc_b64 s[30:31] 178 %result = udiv i64 %num, %den 179 ret i64 %result 180} 181 182; FIXME: This is a workaround for not handling uniform VGPR case. 183declare i32 @llvm.amdgcn.readfirstlane(i32) 184 185define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { 186; CHECK-LABEL: s_udiv_i64: 187; CHECK: ; %bb.0: 188; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3] 189; CHECK-NEXT: s_mov_b32 s4, 0 190; CHECK-NEXT: s_mov_b32 s5, -1 191; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] 192; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 193; CHECK-NEXT: s_cbranch_vccz BB1_2 194; CHECK-NEXT: ; %bb.1: 195; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 196; CHECK-NEXT: v_mov_b32_e32 v1, s3 197; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 198; CHECK-NEXT: s_sub_u32 s6, 0, s2 199; CHECK-NEXT: s_cselect_b32 s4, 1, 0 200; CHECK-NEXT: v_mov_b32_e32 v3, s1 201; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 202; CHECK-NEXT: s_and_b32 s4, s4, 1 203; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 204; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 205; CHECK-NEXT: s_cmp_lg_u32 s4, 0 206; CHECK-NEXT: s_subb_u32 s7, 0, s3 207; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 208; CHECK-NEXT: v_trunc_f32_e32 v2, v2 209; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 210; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 211; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 212; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 213; CHECK-NEXT: v_mul_lo_u32 v5, s6, v0 214; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 215; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 216; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 217; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 218; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 219; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 220; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 221; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 222; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 223; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 224; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 225; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 226; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 227; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 228; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 229; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 230; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 231; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 232; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 233; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 234; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 235; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 236; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 237; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 238; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 239; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 240; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc 241; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 242; CHECK-NEXT: v_mul_lo_u32 v4, s6, v0 243; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 244; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 245; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 246; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 247; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 248; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 249; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 250; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 251; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 252; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 253; CHECK-NEXT: v_mul_hi_u32 v11, v0, v6 254; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 255; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 256; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 257; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 258; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 259; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 260; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 261; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 262; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 263; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 264; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 265; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 266; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 267; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 268; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 269; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc 270; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 271; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 272; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 273; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 274; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 275; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 276; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 277; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 278; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 279; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 280; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 281; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 282; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 283; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 284; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 285; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 286; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 287; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 288; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 289; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 290; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 291; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 292; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 293; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 294; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 295; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 296; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 297; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 298; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 299; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 300; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 301; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 302; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc 303; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2 304; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 305; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 306; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 307; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 308; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc 309; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 310; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc 311; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5 312; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 313; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 314; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 315; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 316; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 317; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 318; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 319; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 320; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc 321; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 322; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 323; CHECK-NEXT: s_mov_b32 s5, 0 324; CHECK-NEXT: s_branch BB1_3 325; CHECK-NEXT: BB1_2: 326; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 327; CHECK-NEXT: BB1_3: ; %Flow 328; CHECK-NEXT: s_xor_b32 s1, s5, -1 329; CHECK-NEXT: s_and_b32 s1, s1, 1 330; CHECK-NEXT: s_cmp_lg_u32 s1, 0 331; CHECK-NEXT: s_cbranch_scc1 BB1_5 332; CHECK-NEXT: ; %bb.4: 333; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 334; CHECK-NEXT: s_sub_i32 s1, 0, s2 335; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 336; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 337; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 338; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 339; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 340; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 341; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 342; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 343; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 344; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 345; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 346; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 347; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1 348; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 349; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 350; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 351; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 352; CHECK-NEXT: BB1_5: 353; CHECK-NEXT: v_readfirstlane_b32 s0, v0 354; CHECK-NEXT: s_mov_b32 s1, s0 355; CHECK-NEXT: ; return to shader part epilog 356 %result = udiv i64 %num, %den 357 %cast = bitcast i64 %result to <2 x i32> 358 %elt.0 = extractelement <2 x i32> %cast, i32 0 359 %elt.1 = extractelement <2 x i32> %cast, i32 1 360 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) 361 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) 362 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 363 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 364 %cast.back = bitcast <2 x i32> %ins.1 to i64 365 ret i64 %cast.back 366} 367 368define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { 369; GISEL-LABEL: v_udiv_v2i64: 370; GISEL: ; %bb.0: 371; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 373; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 374; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 375; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 376; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 377; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 378; GISEL-NEXT: v_trunc_f32_e32 v9, v9 379; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 380; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 381; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 382; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 383; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 384; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 385; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 386; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 387; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 388; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 389; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 390; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 391; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 392; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 393; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 394; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 395; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 396; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 397; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 398; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 399; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 400; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 401; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 402; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 403; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 404; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 405; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 406; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 407; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 408; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 409; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 410; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 411; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 412; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 413; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 414; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 415; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 416; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 417; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 418; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 419; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 420; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 421; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 422; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 423; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 424; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 425; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 426; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 427; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 428; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 429; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 430; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 431; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 432; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 433; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 434; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 435; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 436; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 437; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 438; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 439; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 440; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 441; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 442; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 443; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 444; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 445; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 446; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 447; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 448; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 449; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 450; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 451; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 452; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 453; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 454; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 455; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 456; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 457; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 458; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 459; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 460; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 461; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 462; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 463; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 464; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 465; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 466; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 467; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9 468; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 469; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 470; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 471; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 472; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc 473; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 474; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 475; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 476; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 477; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 478; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 479; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] 480; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 481; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 482; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 483; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 484; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc 485; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 486; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 487; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 488; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 489; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 490; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc 491; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 492; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc 493; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 494; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc 495; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc 496; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 497; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 498; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 499; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 500; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 501; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 502; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 503; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 504; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 505; GISEL-NEXT: v_trunc_f32_e32 v5, v5 506; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 507; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 508; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 509; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 510; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 511; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 512; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 513; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 514; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 515; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 516; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 517; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 518; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 519; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 520; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 521; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 522; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 523; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 524; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 525; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 526; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 527; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 528; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 529; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 530; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 531; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 532; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 533; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 534; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 535; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 536; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 537; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 538; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 539; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc 540; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 541; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 542; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 543; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 544; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 545; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 546; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 547; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 548; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 549; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 550; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 551; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 552; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 553; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 554; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 555; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 556; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 557; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 558; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 559; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 560; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 561; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 562; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 563; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 564; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 565; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 566; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 567; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 568; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 569; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 570; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] 571; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 572; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 573; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 574; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 575; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 576; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 577; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 578; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 579; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 580; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 581; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 582; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 583; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 584; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 585; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 586; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 587; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 588; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 589; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 590; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 591; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 592; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 593; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 594; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5 595; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4 596; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 597; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 598; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 599; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc 600; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 601; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 602; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 603; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 604; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 605; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7 606; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] 607; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 608; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 609; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] 610; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 611; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc 612; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 613; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 614; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 615; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 616; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 617; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc 618; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 619; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc 620; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 621; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc 622; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc 623; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 624; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 625; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 626; GISEL-NEXT: s_setpc_b64 s[30:31] 627; 628; CGP-LABEL: v_udiv_v2i64: 629; CGP: ; %bb.0: 630; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; CGP-NEXT: v_mov_b32_e32 v10, v0 632; CGP-NEXT: v_mov_b32_e32 v11, v1 633; CGP-NEXT: v_mov_b32_e32 v8, v2 634; CGP-NEXT: v_mov_b32_e32 v9, v3 635; CGP-NEXT: v_or_b32_e32 v1, v11, v5 636; CGP-NEXT: v_mov_b32_e32 v0, 0 637; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 638; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 639; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 640; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 641; CGP-NEXT: s_cbranch_execz BB2_2 642; CGP-NEXT: ; %bb.1: 643; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 644; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 645; CGP-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 646; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc 647; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 648; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 649; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 650; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 651; CGP-NEXT: v_trunc_f32_e32 v1, v1 652; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 653; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 654; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 655; CGP-NEXT: v_mul_lo_u32 v12, v2, v1 656; CGP-NEXT: v_mul_lo_u32 v13, v2, v0 657; CGP-NEXT: v_mul_lo_u32 v14, v3, v0 658; CGP-NEXT: v_mul_hi_u32 v15, v2, v0 659; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 660; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 661; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 662; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 663; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 664; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 665; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 666; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 667; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 668; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 669; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 670; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 671; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 672; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 673; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 674; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 675; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 676; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 677; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 678; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 679; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 680; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 681; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 682; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 683; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 684; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 685; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 686; CGP-NEXT: v_mul_lo_u32 v3, v3, v0 687; CGP-NEXT: v_mul_hi_u32 v14, v2, v0 688; CGP-NEXT: v_mul_lo_u32 v2, v2, v13 689; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 690; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 691; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 692; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v3, v2 693; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v2, v14 694; CGP-NEXT: v_mul_lo_u32 v3, v0, v2 695; CGP-NEXT: v_mul_lo_u32 v14, v13, v2 696; CGP-NEXT: v_mul_hi_u32 v17, v0, v2 697; CGP-NEXT: v_mul_hi_u32 v2, v13, v2 698; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v15, v3 699; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 700; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 701; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 702; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v3, v16 703; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] 704; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 705; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 706; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v13, v3 707; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 708; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v12, v3 709; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 710; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 711; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v2, v12 712; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc 713; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 714; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 715; CGP-NEXT: v_mul_lo_u32 v2, v11, v0 716; CGP-NEXT: v_mul_hi_u32 v3, v10, v0 717; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 718; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 719; CGP-NEXT: v_mul_lo_u32 v13, v11, v1 720; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 721; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 722; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 723; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 724; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 725; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 726; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 727; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 728; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 729; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 730; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 731; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 732; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 733; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 734; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 735; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 736; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 737; CGP-NEXT: v_mul_hi_u32 v13, v4, v0 738; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 739; CGP-NEXT: v_mul_lo_u32 v2, v4, v1 740; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 741; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc 742; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 743; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 744; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 745; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 746; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 747; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v2, vcc 748; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 749; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4 750; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 751; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 752; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 753; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc 754; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 755; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v11, vcc 756; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 757; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 758; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 759; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 760; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 761; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 762; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 763; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc 764; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 765; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc 766; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc 767; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 768; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 769; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 770; CGP-NEXT: ; implicit-def: $vgpr4 771; CGP-NEXT: ; implicit-def: $vgpr10 772; CGP-NEXT: BB2_2: ; %Flow2 773; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 774; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 775; CGP-NEXT: s_cbranch_execz BB2_4 776; CGP-NEXT: ; %bb.3: 777; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 778; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 779; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 780; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 781; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 782; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 783; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 784; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 785; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 786; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 787; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 788; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1 789; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 790; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 791; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4 792; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 793; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 794; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 795; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 796; CGP-NEXT: v_mov_b32_e32 v1, 0 797; CGP-NEXT: BB2_4: 798; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 799; CGP-NEXT: v_or_b32_e32 v3, v9, v7 800; CGP-NEXT: v_mov_b32_e32 v2, 0 801; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 802; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 803; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 804; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 805; CGP-NEXT: s_cbranch_execz BB2_6 806; CGP-NEXT: ; %bb.5: 807; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 808; CGP-NEXT: v_cvt_f32_u32_e32 v3, v7 809; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 810; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc 811; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 812; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 813; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 814; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 815; CGP-NEXT: v_trunc_f32_e32 v3, v3 816; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 817; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 818; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 819; CGP-NEXT: v_mul_lo_u32 v10, v4, v3 820; CGP-NEXT: v_mul_lo_u32 v11, v4, v2 821; CGP-NEXT: v_mul_lo_u32 v12, v5, v2 822; CGP-NEXT: v_mul_hi_u32 v13, v4, v2 823; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 824; CGP-NEXT: v_mul_lo_u32 v12, v3, v11 825; CGP-NEXT: v_mul_hi_u32 v14, v2, v11 826; CGP-NEXT: v_mul_hi_u32 v11, v3, v11 827; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 828; CGP-NEXT: v_mul_lo_u32 v13, v2, v10 829; CGP-NEXT: v_mul_lo_u32 v15, v3, v10 830; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 831; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 832; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 833; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 834; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 835; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 836; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 837; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 838; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 839; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 840; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 841; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 842; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 843; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 844; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 845; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 846; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 847; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v3, v10, vcc 848; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v3, v10 849; CGP-NEXT: v_mul_lo_u32 v10, v4, v2 850; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 851; CGP-NEXT: v_mul_hi_u32 v12, v4, v2 852; CGP-NEXT: v_mul_lo_u32 v4, v4, v11 853; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 854; CGP-NEXT: v_mul_hi_u32 v14, v2, v10 855; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 856; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v5, v4 857; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 858; CGP-NEXT: v_mul_lo_u32 v5, v2, v4 859; CGP-NEXT: v_mul_lo_u32 v12, v11, v4 860; CGP-NEXT: v_mul_hi_u32 v15, v2, v4 861; CGP-NEXT: v_mul_hi_u32 v4, v11, v4 862; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v13, v5 863; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 864; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 865; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 866; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v14 867; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] 868; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 869; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 870; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5 871; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 872; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v10, v5 873; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 874; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 875; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v10 876; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc 877; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 878; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 879; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 880; CGP-NEXT: v_mul_hi_u32 v5, v8, v2 881; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 882; CGP-NEXT: v_mul_lo_u32 v10, v8, v3 883; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 884; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 885; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 886; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 887; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 888; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 889; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 890; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 891; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 892; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 893; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 894; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 895; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 896; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 897; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 898; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 899; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 900; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 901; CGP-NEXT: v_mul_hi_u32 v11, v6, v2 902; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 903; CGP-NEXT: v_mul_lo_u32 v4, v6, v3 904; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 905; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc 906; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 907; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 908; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc 909; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 910; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 911; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc 912; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 913; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 914; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 915; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 916; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 917; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc 918; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 919; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v9, vcc 920; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 921; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 922; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6 923; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 924; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 925; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 926; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 927; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc 928; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 929; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v10, vcc 930; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc 931; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 932; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 933; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 934; CGP-NEXT: ; implicit-def: $vgpr6 935; CGP-NEXT: ; implicit-def: $vgpr8 936; CGP-NEXT: BB2_6: ; %Flow 937; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 938; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 939; CGP-NEXT: s_cbranch_execz BB2_8 940; CGP-NEXT: ; %bb.7: 941; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 942; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6 943; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 944; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 945; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 946; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 947; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 948; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 949; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 950; CGP-NEXT: v_mul_lo_u32 v3, v2, v6 951; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 952; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 953; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 954; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 955; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6 956; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 957; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 958; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 959; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 960; CGP-NEXT: v_mov_b32_e32 v3, 0 961; CGP-NEXT: BB2_8: 962; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 963; CGP-NEXT: s_setpc_b64 s[30:31] 964 %result = udiv <2 x i64> %num, %den 965 ret <2 x i64> %result 966} 967 968define i64 @v_udiv_i64_pow2k_denom(i64 %num) { 969; CHECK-LABEL: v_udiv_i64_pow2k_denom: 970; CHECK: ; %bb.0: 971; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12 973; CHECK-NEXT: s_setpc_b64 s[30:31] 974 %result = udiv i64 %num, 4096 975 ret i64 %result 976} 977 978define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) { 979; GISEL-LABEL: v_udiv_v2i64_pow2k_denom: 980; GISEL: ; %bb.0: 981; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GISEL-NEXT: s_sub_u32 s4, 63, 11 983; GISEL-NEXT: s_sub_u32 s4, 64, s4 984; GISEL-NEXT: v_lshr_b64 v[0:1], v[0:1], s4 985; GISEL-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 986; GISEL-NEXT: s_setpc_b64 s[30:31] 987; 988; CGP-LABEL: v_udiv_v2i64_pow2k_denom: 989; CGP: ; %bb.0: 990; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 12 992; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 12 993; CGP-NEXT: s_setpc_b64 s[30:31] 994 %result = udiv <2 x i64> %num, <i64 4096, i64 4096> 995 ret <2 x i64> %result 996} 997 998define i64 @v_udiv_i64_oddk_denom(i64 %num) { 999; CHECK-LABEL: v_udiv_i64_oddk_denom: 1000; CHECK: ; %bb.0: 1001; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1002; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31 1003; CHECK-NEXT: s_mov_b32 s5, 0xd9528440 1004; CHECK-NEXT: v_mul_lo_u32 v2, v1, s4 1005; CHECK-NEXT: v_mul_lo_u32 v3, v0, s5 1006; CHECK-NEXT: v_mul_hi_u32 v4, v0, s4 1007; CHECK-NEXT: v_mul_lo_u32 v5, v1, s5 1008; CHECK-NEXT: v_mul_hi_u32 v6, v1, s4 1009; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5 1010; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1011; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1012; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1013; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1014; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1015; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1016; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 1017; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1018; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1019; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v4 1020; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1021; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1022; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1023; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5 1024; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 1025; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 1026; CHECK-NEXT: s_setpc_b64 s[30:31] 1027 %result = udiv i64 %num, 1235195 1028 ret i64 %result 1029} 1030 1031define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { 1032; CHECK-LABEL: v_udiv_v2i64_oddk_denom: 1033; CHECK: ; %bb.0: 1034; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1035; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31 1036; CHECK-NEXT: s_mov_b32 s5, 0xd9528440 1037; CHECK-NEXT: v_mul_lo_u32 v4, v1, s4 1038; CHECK-NEXT: v_mul_lo_u32 v5, v0, s5 1039; CHECK-NEXT: v_mul_hi_u32 v6, v0, s4 1040; CHECK-NEXT: v_mul_lo_u32 v7, v1, s5 1041; CHECK-NEXT: v_mul_hi_u32 v8, v1, s4 1042; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5 1043; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5 1044; CHECK-NEXT: v_mul_lo_u32 v9, v3, s4 1045; CHECK-NEXT: v_mul_lo_u32 v10, v2, s5 1046; CHECK-NEXT: v_mul_hi_u32 v11, v2, s4 1047; CHECK-NEXT: v_mul_lo_u32 v12, v3, s5 1048; CHECK-NEXT: v_mul_hi_u32 v13, v3, s4 1049; CHECK-NEXT: v_mul_hi_u32 v2, v2, s5 1050; CHECK-NEXT: v_mul_hi_u32 v3, v3, s5 1051; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 1052; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1053; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1054; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1055; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1056; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1057; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 1058; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1059; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1060; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1061; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 1062; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1063; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v11 1064; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1065; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2 1066; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1067; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 1068; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v6 1069; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v7 1070; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v9 1071; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 1072; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1073; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1074; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1075; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 1076; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v6 1077; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 1078; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 1079; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 1080; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20 1081; CHECK-NEXT: s_setpc_b64 s[30:31] 1082 %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195> 1083 ret <2 x i64> %result 1084} 1085 1086define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { 1087; CHECK-LABEL: v_udiv_i64_pow2_shl_denom: 1088; CHECK: ; %bb.0: 1089; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1090; CHECK-NEXT: v_mov_b32_e32 v3, v0 1091; CHECK-NEXT: v_mov_b32_e32 v4, v1 1092; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 1093; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2 1094; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 1095; CHECK-NEXT: v_mov_b32_e32 v0, 0 1096; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 1097; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 1098; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 1099; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1100; CHECK-NEXT: s_cbranch_execz BB7_2 1101; CHECK-NEXT: ; %bb.1: 1102; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 1103; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 1104; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v5 1105; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc 1106; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 1107; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 1108; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1109; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1110; CHECK-NEXT: v_trunc_f32_e32 v1, v1 1111; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 1112; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 1113; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 1114; CHECK-NEXT: v_mul_lo_u32 v8, v2, v1 1115; CHECK-NEXT: v_mul_lo_u32 v9, v2, v0 1116; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0 1117; CHECK-NEXT: v_mul_hi_u32 v11, v2, v0 1118; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1119; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9 1120; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9 1121; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9 1122; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1123; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8 1124; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8 1125; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 1126; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8 1127; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1128; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1129; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1130; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1131; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1132; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1133; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 1134; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1135; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1136; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 1137; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1138; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1139; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1140; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1141; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 1142; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v1, v8, vcc 1143; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v1, v8 1144; CHECK-NEXT: v_mul_lo_u32 v8, v2, v0 1145; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0 1146; CHECK-NEXT: v_mul_hi_u32 v10, v2, v0 1147; CHECK-NEXT: v_mul_lo_u32 v2, v2, v9 1148; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 1149; CHECK-NEXT: v_mul_hi_u32 v12, v0, v8 1150; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 1151; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 1152; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v10 1153; CHECK-NEXT: v_mul_lo_u32 v7, v0, v2 1154; CHECK-NEXT: v_mul_lo_u32 v10, v9, v2 1155; CHECK-NEXT: v_mul_hi_u32 v13, v0, v2 1156; CHECK-NEXT: v_mul_hi_u32 v2, v9, v2 1157; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 1158; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1159; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 1160; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 1161; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 1162; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 1163; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 1164; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1165; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 1166; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 1167; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 1168; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1169; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1170; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v8 1171; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc 1172; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 1173; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1174; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 1175; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0 1176; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 1177; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1 1178; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1 1179; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 1180; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 1181; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 1182; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1183; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 1184; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1185; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 1186; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1187; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 1188; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1189; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 1190; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 1191; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1192; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1193; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 1194; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 1195; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 1196; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 1197; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 1198; CHECK-NEXT: v_mul_lo_u32 v2, v5, v1 1199; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 1200; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc 1201; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 1202; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 1203; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc 1204; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 1205; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 1206; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v4, v2, vcc 1207; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 1208; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 1209; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1210; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 1211; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1212; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc 1213; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6 1214; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc 1215; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 1216; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 1217; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 1218; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 1219; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 1220; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1221; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6 1222; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc 1223; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1224; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc 1225; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc 1226; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 1227; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1228; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1229; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 1230; CHECK-NEXT: ; implicit-def: $vgpr3 1231; CHECK-NEXT: BB7_2: ; %Flow 1232; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 1233; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] 1234; CHECK-NEXT: s_cbranch_execz BB7_4 1235; CHECK-NEXT: ; %bb.3: 1236; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 1237; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5 1238; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 1239; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1240; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 1241; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 1242; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 1243; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1244; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 1245; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5 1246; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 1247; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 1248; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 1249; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1250; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5 1251; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1252; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 1253; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 1254; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1255; CHECK-NEXT: v_mov_b32_e32 v1, 0 1256; CHECK-NEXT: BB7_4: 1257; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 1258; CHECK-NEXT: s_setpc_b64 s[30:31] 1259 %shl.y = shl i64 4096, %y 1260 %r = udiv i64 %x, %shl.y 1261 ret i64 %r 1262} 1263 1264define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { 1265; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom: 1266; GISEL: ; %bb.0: 1267; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1268; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000 1269; GISEL-NEXT: v_lshl_b64 v[7:8], s[4:5], v4 1270; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v6 1271; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v7 1272; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v8 1273; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v9 1274; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1275; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1276; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6 1277; GISEL-NEXT: v_trunc_f32_e32 v9, v9 1278; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9 1279; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1280; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 1281; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 1282; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc 1283; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6 1284; GISEL-NEXT: v_mul_lo_u32 v13, v11, v6 1285; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 1286; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 1287; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1288; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1289; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 1290; GISEL-NEXT: v_mul_lo_u32 v15, v6, v13 1291; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12 1292; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 1293; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1294; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 1295; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1296; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1297; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 1298; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 1299; GISEL-NEXT: v_mul_hi_u32 v16, v6, v13 1300; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 1301; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1302; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 1303; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1304; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 1305; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1306; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1307; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1308; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 1309; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1310; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 1311; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 1312; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 1313; GISEL-NEXT: v_mul_lo_u32 v13, v10, v6 1314; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6 1315; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 1316; GISEL-NEXT: v_mul_hi_u32 v10, v10, v6 1317; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 1318; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 1319; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 1320; GISEL-NEXT: v_mul_lo_u32 v14, v6, v10 1321; GISEL-NEXT: v_mul_hi_u32 v15, v6, v13 1322; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 1323; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1324; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 1325; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1326; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 1327; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 1328; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 1329; GISEL-NEXT: v_mul_hi_u32 v15, v6, v10 1330; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1331; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1332; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 1333; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1334; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 1335; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 1336; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1337; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1338; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 1339; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 1340; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v11 1341; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 1342; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 1343; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 1344; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 1345; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6 1346; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1347; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1348; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1349; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1350; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1351; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 1352; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 1353; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 1354; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 1355; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1356; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 1357; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1358; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1359; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1360; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1361; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1362; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 1363; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1364; GISEL-NEXT: v_mul_lo_u32 v10, v7, v6 1365; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6 1366; GISEL-NEXT: v_mul_lo_u32 v12, v7, v9 1367; GISEL-NEXT: v_mul_hi_u32 v13, v7, v6 1368; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1369; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1370; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 1371; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc 1372; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 1373; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v8 1374; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 1375; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 1376; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 1377; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8 1378; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] 1379; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v7 1380; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc 1381; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1382; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v6 1383; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc 1384; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8 1385; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 1386; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 1387; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1388; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v8 1389; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc 1390; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 1391; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v12, vcc 1392; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1393; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc 1394; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc 1395; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1396; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc 1397; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 1398; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v4 1399; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v5 1400; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 1401; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1402; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1403; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 1404; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1405; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 1406; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1407; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1408; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 1409; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc 1410; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6 1411; GISEL-NEXT: v_mul_lo_u32 v11, v9, v6 1412; GISEL-NEXT: v_mul_lo_u32 v12, v8, v7 1413; GISEL-NEXT: v_mul_hi_u32 v13, v8, v6 1414; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1415; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1416; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10 1417; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11 1418; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10 1419; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 1420; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1421; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1422; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1423; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1424; GISEL-NEXT: v_mul_lo_u32 v13, v7, v11 1425; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 1426; GISEL-NEXT: v_mul_hi_u32 v14, v6, v11 1427; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1428; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1429; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 1430; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1431; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1432; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1433; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1434; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1435; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 1436; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1437; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1438; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v7, v11, vcc 1439; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 1440; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6 1441; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6 1442; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 1443; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 1444; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 1445; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1446; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 1447; GISEL-NEXT: v_mul_lo_u32 v12, v6, v8 1448; GISEL-NEXT: v_mul_hi_u32 v13, v6, v11 1449; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 1450; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1451; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 1452; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1453; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 1454; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 1455; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 1456; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 1457; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 1458; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1459; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 1460; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1461; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 1462; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 1463; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1464; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 1465; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 1466; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 1467; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v9 1468; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc 1469; GISEL-NEXT: v_addc_u32_e64 v7, vcc, 0, v7, s[4:5] 1470; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6 1471; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7 1472; GISEL-NEXT: v_mul_hi_u32 v10, v2, v6 1473; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1474; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1475; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1476; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1477; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1478; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7 1479; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1480; GISEL-NEXT: v_mul_hi_u32 v10, v2, v7 1481; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 1482; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1483; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1484; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1485; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1486; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1487; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1488; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1489; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 1490; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1491; GISEL-NEXT: v_mul_lo_u32 v8, v4, v6 1492; GISEL-NEXT: v_mul_lo_u32 v9, v5, v6 1493; GISEL-NEXT: v_mul_lo_u32 v10, v4, v7 1494; GISEL-NEXT: v_mul_hi_u32 v11, v4, v6 1495; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1496; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1497; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 1498; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc 1499; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 1500; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 1501; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1502; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 1503; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 1504; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v5 1505; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] 1506; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v4 1507; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc 1508; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] 1509; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6 1510; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v7, vcc 1511; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 1512; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1513; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 1514; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1515; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 1516; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc 1517; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 1518; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v10, vcc 1519; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1520; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc 1521; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc 1522; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 1523; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 1524; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 1525; GISEL-NEXT: s_setpc_b64 s[30:31] 1526; 1527; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom: 1528; CGP: ; %bb.0: 1529; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; CGP-NEXT: v_mov_b32_e32 v8, v0 1531; CGP-NEXT: v_mov_b32_e32 v9, v1 1532; CGP-NEXT: v_mov_b32_e32 v5, v2 1533; CGP-NEXT: v_mov_b32_e32 v7, v3 1534; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 1535; CGP-NEXT: v_lshl_b64 v[2:3], s[4:5], v4 1536; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v6 1537; CGP-NEXT: v_or_b32_e32 v1, v9, v3 1538; CGP-NEXT: v_mov_b32_e32 v0, 0 1539; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 1540; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 1541; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 1542; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1543; CGP-NEXT: s_cbranch_execz BB8_2 1544; CGP-NEXT: ; %bb.1: 1545; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2 1546; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3 1547; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 1548; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v3, vcc 1549; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 1550; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 1551; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1552; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1553; CGP-NEXT: v_trunc_f32_e32 v1, v1 1554; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 1555; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 1556; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 1557; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 1558; CGP-NEXT: v_mul_lo_u32 v13, v4, v0 1559; CGP-NEXT: v_mul_lo_u32 v14, v6, v0 1560; CGP-NEXT: v_mul_hi_u32 v15, v4, v0 1561; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 1562; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 1563; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 1564; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 1565; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 1566; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 1567; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 1568; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 1569; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 1570; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 1571; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1572; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 1573; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1574; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 1575; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1576; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 1577; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1578; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1579; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 1580; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1581; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1582; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1583; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1584; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 1585; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 1586; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 1587; CGP-NEXT: v_mul_lo_u32 v12, v4, v0 1588; CGP-NEXT: v_mul_lo_u32 v6, v6, v0 1589; CGP-NEXT: v_mul_hi_u32 v14, v4, v0 1590; CGP-NEXT: v_mul_lo_u32 v4, v4, v13 1591; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 1592; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 1593; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 1594; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 1595; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v14 1596; CGP-NEXT: v_mul_lo_u32 v6, v0, v4 1597; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 1598; CGP-NEXT: v_mul_hi_u32 v17, v0, v4 1599; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 1600; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 1601; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1602; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 1603; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1604; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 1605; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1606; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 1607; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1608; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 1609; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 1610; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 1611; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1612; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 1613; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 1614; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc 1615; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 1616; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1617; CGP-NEXT: v_mul_lo_u32 v4, v9, v0 1618; CGP-NEXT: v_mul_hi_u32 v6, v8, v0 1619; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 1620; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 1621; CGP-NEXT: v_mul_lo_u32 v13, v9, v1 1622; CGP-NEXT: v_mul_hi_u32 v14, v8, v1 1623; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 1624; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 1625; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1626; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 1627; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1628; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1629; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1630; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 1631; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1632; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 1633; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 1634; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 1635; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1636; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1637; CGP-NEXT: v_mul_lo_u32 v6, v2, v0 1638; CGP-NEXT: v_mul_lo_u32 v12, v3, v0 1639; CGP-NEXT: v_mul_hi_u32 v13, v2, v0 1640; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 1641; CGP-NEXT: v_mul_lo_u32 v4, v2, v1 1642; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 1643; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc 1644; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 1645; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 1646; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 1647; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 1648; CGP-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 1649; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc 1650; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 1651; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 1652; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1653; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 1654; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 1655; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc 1656; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 1657; CGP-NEXT: v_cndmask_b32_e32 v8, v13, v9, vcc 1658; CGP-NEXT: v_sub_i32_e32 v6, vcc, v6, v2 1659; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 1660; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 1661; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1662; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 1663; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 1664; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 1665; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 1666; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1667; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc 1668; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc 1669; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 1670; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1671; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1672; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 1673; CGP-NEXT: ; implicit-def: $vgpr8 1674; CGP-NEXT: BB8_2: ; %Flow2 1675; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 1676; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 1677; CGP-NEXT: s_cbranch_execz BB8_4 1678; CGP-NEXT: ; %bb.3: 1679; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2 1680; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 1681; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 1682; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1683; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 1684; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 1685; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 1686; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1687; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 1688; CGP-NEXT: v_mul_lo_u32 v1, v0, v2 1689; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 1690; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 1691; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 1692; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1693; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2 1694; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1695; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 1696; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 1697; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1698; CGP-NEXT: v_mov_b32_e32 v1, 0 1699; CGP-NEXT: BB8_4: 1700; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 1701; CGP-NEXT: v_or_b32_e32 v3, v7, v11 1702; CGP-NEXT: v_mov_b32_e32 v2, 0 1703; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 1704; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 1705; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 1706; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1707; CGP-NEXT: s_cbranch_execz BB8_6 1708; CGP-NEXT: ; %bb.5: 1709; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 1710; CGP-NEXT: v_cvt_f32_u32_e32 v3, v11 1711; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10 1712; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc 1713; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 1714; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 1715; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 1716; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 1717; CGP-NEXT: v_trunc_f32_e32 v3, v3 1718; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 1719; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 1720; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 1721; CGP-NEXT: v_mul_lo_u32 v8, v4, v3 1722; CGP-NEXT: v_mul_lo_u32 v9, v4, v2 1723; CGP-NEXT: v_mul_lo_u32 v12, v6, v2 1724; CGP-NEXT: v_mul_hi_u32 v13, v4, v2 1725; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1726; CGP-NEXT: v_mul_lo_u32 v12, v3, v9 1727; CGP-NEXT: v_mul_hi_u32 v14, v2, v9 1728; CGP-NEXT: v_mul_hi_u32 v9, v3, v9 1729; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 1730; CGP-NEXT: v_mul_lo_u32 v13, v2, v8 1731; CGP-NEXT: v_mul_lo_u32 v15, v3, v8 1732; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 1733; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 1734; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 1735; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1736; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 1737; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1738; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1739; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1740; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v16 1741; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1742; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1743; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 1744; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1745; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1746; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1747; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1748; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9 1749; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v3, v8, vcc 1750; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v3, v8 1751; CGP-NEXT: v_mul_lo_u32 v8, v4, v2 1752; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 1753; CGP-NEXT: v_mul_hi_u32 v12, v4, v2 1754; CGP-NEXT: v_mul_lo_u32 v4, v4, v9 1755; CGP-NEXT: v_mul_lo_u32 v13, v9, v8 1756; CGP-NEXT: v_mul_hi_u32 v14, v2, v8 1757; CGP-NEXT: v_mul_hi_u32 v8, v9, v8 1758; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 1759; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 1760; CGP-NEXT: v_mul_lo_u32 v6, v2, v4 1761; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 1762; CGP-NEXT: v_mul_hi_u32 v15, v2, v4 1763; CGP-NEXT: v_mul_hi_u32 v4, v9, v4 1764; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 1765; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1766; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v12, v8 1767; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1768; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v14 1769; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1770; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v15 1771; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1772; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v9, v6 1773; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v13 1774; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v8, v6 1775; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1776; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 1777; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v8 1778; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc 1779; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1780; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1781; CGP-NEXT: v_mul_lo_u32 v4, v7, v2 1782; CGP-NEXT: v_mul_hi_u32 v6, v5, v2 1783; CGP-NEXT: v_mul_hi_u32 v2, v7, v2 1784; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 1785; CGP-NEXT: v_mul_lo_u32 v9, v7, v3 1786; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 1787; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 1788; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1789; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1790; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v2 1791; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1792; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1793; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1794; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 1795; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1796; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 1797; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 1798; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1799; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1800; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1801; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 1802; CGP-NEXT: v_mul_lo_u32 v8, v11, v2 1803; CGP-NEXT: v_mul_hi_u32 v9, v10, v2 1804; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1805; CGP-NEXT: v_mul_lo_u32 v4, v10, v3 1806; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 1807; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc 1808; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 1809; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 1810; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc 1811; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1812; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 1813; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc 1814; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 1815; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 1816; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 1817; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v11 1818; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1819; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v11, vcc 1820; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v11 1821; CGP-NEXT: v_cndmask_b32_e32 v6, v9, v7, vcc 1822; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v10 1823; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 1824; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v10 1825; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1826; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v11 1827; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 1828; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v11 1829; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc 1830; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 1831; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc 1832; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc 1833; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 1834; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1835; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 1836; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 1837; CGP-NEXT: ; implicit-def: $vgpr5 1838; CGP-NEXT: BB8_6: ; %Flow 1839; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 1840; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 1841; CGP-NEXT: s_cbranch_execz BB8_8 1842; CGP-NEXT: ; %bb.7: 1843; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 1844; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v10 1845; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 1846; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1847; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 1848; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 1849; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 1850; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1851; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 1852; CGP-NEXT: v_mul_lo_u32 v3, v2, v10 1853; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 1854; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 1855; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10 1856; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1857; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v10 1858; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1859; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 1860; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10 1861; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1862; CGP-NEXT: v_mov_b32_e32 v3, 0 1863; CGP-NEXT: BB8_8: 1864; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 1865; CGP-NEXT: s_setpc_b64 s[30:31] 1866 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y 1867 %r = udiv <2 x i64> %x, %shl.y 1868 ret <2 x i64> %r 1869} 1870 1871define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) { 1872; GISEL-LABEL: v_udiv_i64_24bit: 1873; GISEL: ; %bb.0: 1874; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1875; GISEL-NEXT: s_mov_b32 s4, 0xffffff 1876; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 1877; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 1878; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 1879; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 1880; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 1881; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1882; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 1883; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 1884; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 1885; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1886; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 1887; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 1888; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 1889; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 1890; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 1891; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1892; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 1893; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1894; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 1895; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 1896; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 1897; GISEL-NEXT: v_mov_b32_e32 v1, 0 1898; GISEL-NEXT: s_setpc_b64 s[30:31] 1899; 1900; CGP-LABEL: v_udiv_i64_24bit: 1901; CGP: ; %bb.0: 1902; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1903; CGP-NEXT: s_mov_b32 s4, 0xffffff 1904; CGP-NEXT: v_and_b32_e32 v0, s4, v0 1905; CGP-NEXT: v_and_b32_e32 v1, s4, v2 1906; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 1907; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1 1908; CGP-NEXT: v_rcp_f32_e32 v2, v1 1909; CGP-NEXT: v_mul_f32_e32 v2, v0, v2 1910; CGP-NEXT: v_trunc_f32_e32 v2, v2 1911; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0 1912; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 1913; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1 1914; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1915; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 1916; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1917; CGP-NEXT: v_mov_b32_e32 v1, 0 1918; CGP-NEXT: s_setpc_b64 s[30:31] 1919 %num.mask = and i64 %num, 16777215 1920 %den.mask = and i64 %den, 16777215 1921 %result = udiv i64 %num.mask, %den.mask 1922 ret i64 %result 1923} 1924 1925define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { 1926; GISEL-LABEL: v_udiv_v2i64_24bit: 1927; GISEL: ; %bb.0: 1928; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1929; GISEL-NEXT: s_mov_b32 s6, 0xffffff 1930; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v7, 0 1931; GISEL-NEXT: v_and_b32_e32 v1, s6, v4 1932; GISEL-NEXT: v_and_b32_e32 v3, s6, v6 1933; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1 1934; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 1935; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc 1936; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 1937; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 1938; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc 1939; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 1940; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v7 1941; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1942; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8 1943; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1944; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 1945; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 1946; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 1947; GISEL-NEXT: v_trunc_f32_e32 v8, v8 1948; GISEL-NEXT: v_trunc_f32_e32 v11, v11 1949; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 1950; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 1951; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 1952; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 1953; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1954; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 1955; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1956; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 1957; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6 1958; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6 1959; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6 1960; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7 1961; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7 1962; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7 1963; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 1964; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 1965; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 1966; GISEL-NEXT: v_mul_hi_u32 v18, v7, v17 1967; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 1968; GISEL-NEXT: v_mul_lo_u32 v19, v7, v13 1969; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 1970; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1971; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 1972; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 1973; GISEL-NEXT: v_mul_hi_u32 v18, v6, v14 1974; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 1975; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 1976; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 1977; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 1978; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 1979; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1980; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 1981; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 1982; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 1983; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 1984; GISEL-NEXT: v_mul_hi_u32 v18, v6, v12 1985; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 1986; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1987; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 1988; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 1989; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 1990; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1991; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 1992; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 1993; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 1994; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13 1995; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 1996; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 1997; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1998; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19 1999; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000 2000; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000 2001; GISEL-NEXT: s_bfe_i32 s12, -1, 0x10000 2002; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000 2003; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 2004; GISEL-NEXT: v_and_b32_e32 v2, s6, v2 2005; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 2006; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 2007; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2008; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2009; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 2010; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2011; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 2012; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 2013; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 2014; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 2015; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 2016; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v8, v12, vcc 2017; GISEL-NEXT: v_mul_lo_u32 v15, v4, v6 2018; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 2019; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v17 2020; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5] 2021; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7 2022; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 2023; GISEL-NEXT: v_mul_hi_u32 v18, v9, v7 2024; GISEL-NEXT: v_mul_lo_u32 v9, v9, v16 2025; GISEL-NEXT: v_mul_lo_u32 v19, v16, v17 2026; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v10, v9 2027; GISEL-NEXT: v_mul_hi_u32 v10, v7, v17 2028; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 2029; GISEL-NEXT: v_mul_lo_u32 v18, v7, v9 2030; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 2031; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 2032; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v18, v10 2033; GISEL-NEXT: v_mul_hi_u32 v10, v4, v6 2034; GISEL-NEXT: v_mul_lo_u32 v4, v4, v14 2035; GISEL-NEXT: v_mul_lo_u32 v18, v14, v15 2036; GISEL-NEXT: v_add_i32_e64 v4, s[8:9], v5, v4 2037; GISEL-NEXT: v_mul_hi_u32 v5, v6, v15 2038; GISEL-NEXT: v_add_i32_e64 v4, s[8:9], v4, v10 2039; GISEL-NEXT: v_mul_lo_u32 v10, v6, v4 2040; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v10 2041; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 2042; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v10, v5 2043; GISEL-NEXT: v_mov_b32_e32 v5, s10 2044; GISEL-NEXT: v_mov_b32_e32 v10, s11 2045; GISEL-NEXT: v_add_i32_e64 v8, s[10:11], v8, v12 2046; GISEL-NEXT: v_mov_b32_e32 v12, s12 2047; GISEL-NEXT: v_add_i32_e64 v11, s[10:11], v11, v13 2048; GISEL-NEXT: v_mul_hi_u32 v13, v14, v15 2049; GISEL-NEXT: v_mul_hi_u32 v15, v16, v17 2050; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 2051; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 2052; GISEL-NEXT: v_mul_lo_u32 v18, v14, v4 2053; GISEL-NEXT: v_mul_hi_u32 v14, v14, v4 2054; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 2055; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v18, v13 2056; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 2057; GISEL-NEXT: v_add_i32_e64 v4, s[8:9], v13, v4 2058; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[8:9] 2059; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v18, v13 2060; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 2061; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18 2062; GISEL-NEXT: v_mul_lo_u32 v19, v16, v9 2063; GISEL-NEXT: v_mul_hi_u32 v16, v16, v9 2064; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 2065; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v19, v15 2066; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 2067; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v15, v9 2068; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 2069; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v19, v15 2070; GISEL-NEXT: v_mov_b32_e32 v19, s13 2071; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v17 2072; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] 2073; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 2074; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] 2075; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v17 2076; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 2077; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v14, v13 2078; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v15 2079; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc 2080; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v14, s[4:5] 2081; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 2082; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v8, vcc 2083; GISEL-NEXT: v_mul_lo_u32 v8, 0, v4 2084; GISEL-NEXT: v_mul_hi_u32 v13, v0, v4 2085; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4 2086; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 2087; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc 2088; GISEL-NEXT: v_mul_lo_u32 v11, 0, v7 2089; GISEL-NEXT: v_mul_hi_u32 v14, v2, v7 2090; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7 2091; GISEL-NEXT: v_mul_lo_u32 v15, v0, v6 2092; GISEL-NEXT: v_mul_lo_u32 v16, 0, v6 2093; GISEL-NEXT: v_mul_hi_u32 v17, v0, v6 2094; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 2095; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9 2096; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 2097; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2098; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 2099; GISEL-NEXT: v_mul_lo_u32 v11, 0, v9 2100; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 2101; GISEL-NEXT: v_mul_hi_u32 v9, 0, v9 2102; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v15 2103; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2104; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v16, v4 2105; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 2106; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 2107; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2108; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 2109; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 2110; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v17 2111; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2112; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2113; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v14 2114; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2115; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 2116; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 2117; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17 2118; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 2119; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2120; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2121; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 2122; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2123; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 2124; GISEL-NEXT: v_mul_lo_u32 v13, v1, v4 2125; GISEL-NEXT: v_mul_lo_u32 v15, 0, v4 2126; GISEL-NEXT: v_mul_hi_u32 v16, v1, v4 2127; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 2128; GISEL-NEXT: v_mul_lo_u32 v14, v3, v7 2129; GISEL-NEXT: v_mul_lo_u32 v17, 0, v7 2130; GISEL-NEXT: v_mul_hi_u32 v18, v3, v7 2131; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2132; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v11 2133; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 2134; GISEL-NEXT: v_mul_lo_u32 v11, v3, v8 2135; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 2136; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 2137; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v4 2138; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v6, vcc 2139; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 2140; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 2141; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v13 2142; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, v9, vcc 2143; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 2144; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] 2145; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 2146; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], 1, v7 2147; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, v8, s[6:7] 2148; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v14 2149; GISEL-NEXT: v_subb_u32_e64 v14, s[8:9], 0, v11, s[6:7] 2150; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[4:5] 2151; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 2152; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] 2153; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 2154; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, v16, s[4:5] 2155; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], 1, v15 2156; GISEL-NEXT: v_addc_u32_e64 v16, s[4:5], 0, v17, s[4:5] 2157; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], 0, v11 2158; GISEL-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[6:7] 2159; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 2160; GISEL-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v11, s[4:5] 2161; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 2162; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 1, v13 2163; GISEL-NEXT: v_addc_u32_e64 v3, s[6:7], 0, v18, s[6:7] 2164; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], 0, v9 2165; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 2166; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2167; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 2168; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 2169; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2170; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] 2171; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 2172; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc 2173; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 2174; GISEL-NEXT: v_cndmask_b32_e32 v1, v19, v1, vcc 2175; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 2176; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v14, vcc 2177; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v1 2178; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v2, s[4:5] 2179; GISEL-NEXT: v_cndmask_b32_e32 v9, v17, v16, vcc 2180; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 2181; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 2182; GISEL-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[4:5] 2183; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12 2184; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v1, s[4:5] 2185; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v9, vcc 2186; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] 2187; GISEL-NEXT: s_setpc_b64 s[30:31] 2188; 2189; CGP-LABEL: v_udiv_v2i64_24bit: 2190; CGP: ; %bb.0: 2191; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2192; CGP-NEXT: s_mov_b32 s6, 0xffffff 2193; CGP-NEXT: v_and_b32_e32 v0, s6, v0 2194; CGP-NEXT: v_and_b32_e32 v1, s6, v2 2195; CGP-NEXT: v_and_b32_e32 v2, s6, v4 2196; CGP-NEXT: v_and_b32_e32 v3, s6, v6 2197; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 2198; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2 2199; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1 2200; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3 2201; CGP-NEXT: v_rcp_f32_e32 v4, v2 2202; CGP-NEXT: v_rcp_f32_e32 v5, v3 2203; CGP-NEXT: v_mul_f32_e32 v4, v0, v4 2204; CGP-NEXT: v_mul_f32_e32 v5, v1, v5 2205; CGP-NEXT: v_trunc_f32_e32 v4, v4 2206; CGP-NEXT: v_trunc_f32_e32 v5, v5 2207; CGP-NEXT: v_mad_f32 v0, -v4, v2, v0 2208; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2209; CGP-NEXT: v_mad_f32 v1, -v5, v3, v1 2210; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 2211; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 2212; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2213; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, v3 2214; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 2215; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 2216; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 2217; CGP-NEXT: v_and_b32_e32 v0, s6, v0 2218; CGP-NEXT: v_and_b32_e32 v2, s6, v1 2219; CGP-NEXT: v_mov_b32_e32 v1, 0 2220; CGP-NEXT: v_mov_b32_e32 v3, 0 2221; CGP-NEXT: s_setpc_b64 s[30:31] 2222 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215> 2223 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215> 2224 %result = udiv <2 x i64> %num.mask, %den.mask 2225 ret <2 x i64> %result 2226} 2227