1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_sdiv_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_sdiv_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 12; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 13; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 14; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 15; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 16; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 17; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 18; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 19; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 20; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 21; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 22; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 23; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 24; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 25; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 26; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 27; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 28; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 29; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 30; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 31; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 32; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 33; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4 34; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 35; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 36; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 37; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 38; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 39; GISEL-NEXT: s_setpc_b64 s[30:31] 40; 41; CGP-LABEL: v_sdiv_i32: 42; CGP: ; %bb.0: 43; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 45; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 46; CGP-NEXT: v_xor_b32_e32 v4, v2, v3 47; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 48; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 49; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 50; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 51; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 52; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 53; CGP-NEXT: v_mul_lo_u32 v5, v0, 0 54; CGP-NEXT: v_rcp_f32_e32 v2, v2 55; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 56; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 57; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 58; CGP-NEXT: v_mul_lo_u32 v6, v2, 0 59; CGP-NEXT: v_mul_lo_u32 v7, 0, v3 60; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 61; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 62; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 63; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 64; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 65; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 66; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 67; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 68; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 69; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 70; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 71; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 72; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 73; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 74; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 75; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 76; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 77; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 78; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 79; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 80; CGP-NEXT: s_setpc_b64 s[30:31] 81 %result = sdiv i32 %num, %den 82 ret i32 %result 83} 84 85; FIXME: This is a workaround for not handling uniform VGPR case. 86declare i32 @llvm.amdgcn.readfirstlane(i32) 87 88define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) { 89; GISEL-LABEL: s_sdiv_i32: 90; GISEL: ; %bb.0: 91; GISEL-NEXT: s_ashr_i32 s2, s0, 31 92; GISEL-NEXT: s_ashr_i32 s3, s1, 31 93; GISEL-NEXT: s_add_i32 s0, s0, s2 94; GISEL-NEXT: s_add_i32 s1, s1, s3 95; GISEL-NEXT: s_xor_b32 s0, s0, s2 96; GISEL-NEXT: s_xor_b32 s4, s1, s3 97; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s4 98; GISEL-NEXT: s_sub_i32 s1, 0, s4 99; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 100; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 101; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 102; GISEL-NEXT: v_mul_lo_u32 v1, s1, v0 103; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 104; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 105; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 106; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4 107; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 108; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 109; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 110; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 111; GISEL-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1 112; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 113; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 114; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 115; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 116; GISEL-NEXT: s_xor_b32 s0, s2, s3 117; GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 118; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 119; GISEL-NEXT: v_readfirstlane_b32 s0, v0 120; GISEL-NEXT: ; return to shader part epilog 121; 122; CGP-LABEL: s_sdiv_i32: 123; CGP: ; %bb.0: 124; CGP-NEXT: s_ashr_i32 s2, s0, 31 125; CGP-NEXT: s_ashr_i32 s3, s1, 31 126; CGP-NEXT: s_xor_b32 s4, s2, s3 127; CGP-NEXT: s_add_i32 s0, s0, s2 128; CGP-NEXT: s_add_i32 s1, s1, s3 129; CGP-NEXT: s_xor_b32 s0, s0, s2 130; CGP-NEXT: s_xor_b32 s5, s1, s3 131; CGP-NEXT: v_cvt_f32_u32_e32 v0, s5 132; CGP-NEXT: s_sub_i32 s1, 0, s5 133; CGP-NEXT: s_bfe_u64 s[2:3], s[0:1], 0x200000 134; CGP-NEXT: v_rcp_f32_e32 v0, v0 135; CGP-NEXT: v_mul_lo_u32 v1, s2, 0 136; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 137; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 138; CGP-NEXT: v_mul_lo_u32 v2, s1, v0 139; CGP-NEXT: v_mul_lo_u32 v3, v0, 0 140; CGP-NEXT: v_mul_lo_u32 v4, 0, v2 141; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 142; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 143; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 144; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 145; CGP-NEXT: v_mul_lo_u32 v2, s3, v0 146; CGP-NEXT: v_mul_hi_u32 v0, s2, v0 147; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 148; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 149; CGP-NEXT: v_mul_lo_u32 v1, v0, s5 150; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 151; CGP-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 152; CGP-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 153; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 154; CGP-NEXT: v_subrev_i32_e64 v2, s[0:1], s5, v1 155; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 156; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 157; CGP-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 158; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 159; CGP-NEXT: v_xor_b32_e32 v0, s4, v0 160; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 161; CGP-NEXT: v_readfirstlane_b32 s0, v0 162; CGP-NEXT: ; return to shader part epilog 163 %result = sdiv i32 %num, %den 164 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 165 ret i32 %readlane 166} 167 168define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) { 169; GISEL-LABEL: v_sdiv_v2i32: 170; GISEL: ; %bb.0: 171; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 173; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 174; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 175; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 176; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 177; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 178; GISEL-NEXT: v_xor_b32_e32 v8, v4, v5 179; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 180; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 181; GISEL-NEXT: v_xor_b32_e32 v9, v6, v7 182; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 183; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 184; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 185; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 186; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 187; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 188; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 189; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 190; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 191; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 192; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 193; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 194; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 195; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 196; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 197; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 198; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 199; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 200; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 201; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 202; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 203; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 204; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 205; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 206; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 207; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 208; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 209; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 210; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 211; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 212; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 213; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 214; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] 215; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 216; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 217; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 218; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 219; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 220; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 221; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 222; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 223; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 224; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 225; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9 226; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 227; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 228; GISEL-NEXT: s_setpc_b64 s[30:31] 229; 230; CGP-LABEL: v_sdiv_v2i32: 231; CGP: ; %bb.0: 232; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 234; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 235; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 236; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 237; CGP-NEXT: v_xor_b32_e32 v8, v4, v5 238; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 239; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 240; CGP-NEXT: v_xor_b32_e32 v9, v6, v7 241; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 242; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 243; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 244; CGP-NEXT: v_xor_b32_e32 v2, v2, v5 245; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 246; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 247; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 248; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 249; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 250; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 251; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 252; CGP-NEXT: v_mul_lo_u32 v11, v1, 0 253; CGP-NEXT: v_rcp_f32_e32 v4, v4 254; CGP-NEXT: v_rcp_f32_e32 v7, v7 255; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 256; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 257; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 258; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 259; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 260; CGP-NEXT: v_mul_lo_u32 v12, v4, 0 261; CGP-NEXT: v_mul_lo_u32 v10, v10, v7 262; CGP-NEXT: v_mul_lo_u32 v13, v7, 0 263; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 264; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 265; CGP-NEXT: v_mul_lo_u32 v15, 0, v10 266; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 267; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 268; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 269; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 270; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 271; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 272; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v10 273; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 274; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 275; CGP-NEXT: v_mul_lo_u32 v10, 0, v5 276; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 277; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 278; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v11 279; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 280; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 281; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 282; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 283; CGP-NEXT: v_mul_lo_u32 v10, v5, v3 284; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v5 285; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 286; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 287; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 288; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 289; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 290; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 291; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] 292; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 293; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 294; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 295; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 296; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 297; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 298; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 299; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 300; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 301; CGP-NEXT: v_xor_b32_e32 v0, v0, v8 302; CGP-NEXT: v_xor_b32_e32 v1, v1, v9 303; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 304; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 305; CGP-NEXT: s_setpc_b64 s[30:31] 306 %result = sdiv <2 x i32> %num, %den 307 ret <2 x i32> %result 308} 309 310define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { 311; CHECK-LABEL: v_sdiv_i32_pow2k_denom: 312; CHECK: ; %bb.0: 313; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; CHECK-NEXT: s_movk_i32 s6, 0x1000 315; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 316; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 317; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 318; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 319; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 320; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 321; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 322; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 323; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 324; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 325; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 326; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 327; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 328; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 329; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 330; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 331; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 332; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0 333; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 334; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 335; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 336; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 337; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 338; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 339; CHECK-NEXT: s_setpc_b64 s[30:31] 340 %result = sdiv i32 %num, 4096 341 ret i32 %result 342} 343 344define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { 345; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom: 346; GISEL: ; %bb.0: 347; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 349; GISEL-NEXT: s_add_i32 s8, 0x1000, 0 350; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 351; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 352; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8 353; GISEL-NEXT: s_sub_i32 s4, 0, s8 354; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 355; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 356; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 357; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 358; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 359; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 360; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 361; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 362; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5 363; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4 364; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 365; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 366; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 367; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 368; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 369; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 370; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 371; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 372; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 373; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 374; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 375; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 376; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 377; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 378; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0 379; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 380; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5] 381; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1 382; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 383; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5 384; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 385; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 386; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 387; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc 388; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 389; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc 390; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 391; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 392; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 393; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 394; GISEL-NEXT: s_setpc_b64 s[30:31] 395; 396; CGP-LABEL: v_sdiv_v2i32_pow2k_denom: 397; CGP: ; %bb.0: 398; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 399; CGP-NEXT: s_movk_i32 s4, 0x1000 400; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 401; CGP-NEXT: v_mov_b32_e32 v3, 0x1000 402; CGP-NEXT: s_mov_b32 s5, 0xfffff000 403; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 404; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 405; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 406; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 407; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 408; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 409; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 410; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 411; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 412; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 413; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 414; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 415; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 416; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 417; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 418; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 419; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 420; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 421; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 422; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 423; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 424; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 425; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v6 426; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6 427; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v4 428; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4 429; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 430; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 431; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 432; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 433; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0 434; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 435; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5] 436; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3 437; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 438; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 439; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5] 440; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 441; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 442; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc 443; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 444; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc 445; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 446; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 447; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 448; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 449; CGP-NEXT: s_setpc_b64 s[30:31] 450 %result = sdiv <2 x i32> %num, <i32 4096, i32 4096> 451 ret <2 x i32> %result 452} 453 454define i32 @v_sdiv_i32_oddk_denom(i32 %num) { 455; CHECK-LABEL: v_sdiv_i32_oddk_denom: 456; CHECK: ; %bb.0: 457; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 458; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb 459; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 460; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 461; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 462; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 463; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 464; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 465; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 466; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 467; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 468; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 469; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 470; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 471; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6 472; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 473; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 474; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 475; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 476; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0 477; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 478; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 479; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 480; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 481; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 482; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 483; CHECK-NEXT: s_setpc_b64 s[30:31] 484 %result = sdiv i32 %num, 1235195 485 ret i32 %result 486} 487 488define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { 489; GISEL-LABEL: v_sdiv_v2i32_oddk_denom: 490; GISEL: ; %bb.0: 491; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 493; GISEL-NEXT: s_add_i32 s8, 0x12d8fb, 0 494; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 495; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 496; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8 497; GISEL-NEXT: s_sub_i32 s4, 0, s8 498; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 499; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 500; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 501; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 502; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 503; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 504; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 505; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 506; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5 507; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4 508; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 509; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 510; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 511; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 512; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 513; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 514; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 515; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 516; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 517; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 518; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 519; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 520; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 521; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 522; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0 523; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 524; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5] 525; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1 526; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 527; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5 528; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 529; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 530; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 531; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc 532; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 533; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc 534; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 535; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 536; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 537; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 538; GISEL-NEXT: s_setpc_b64 s[30:31] 539; 540; CGP-LABEL: v_sdiv_v2i32_oddk_denom: 541; CGP: ; %bb.0: 542; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; CGP-NEXT: s_mov_b32 s4, 0x12d8fb 544; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 545; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb 546; CGP-NEXT: s_mov_b32 s5, 0xffed2705 547; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 548; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 549; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 550; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 551; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 552; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 553; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 554; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 555; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 556; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 557; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 558; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 559; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 560; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 561; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 562; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 563; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 564; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 565; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 566; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 567; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 568; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 569; CGP-NEXT: v_mul_lo_u32 v7, v6, s4 570; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6 571; CGP-NEXT: v_mul_lo_u32 v9, v4, v3 572; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4 573; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 574; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 575; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 576; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 577; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0 578; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 579; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5] 580; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3 581; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 582; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 583; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5] 584; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 585; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 586; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc 587; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 588; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc 589; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 590; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 591; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 592; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 593; CGP-NEXT: s_setpc_b64 s[30:31] 594 %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195> 595 ret <2 x i32> %result 596} 597 598define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) { 599; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom: 600; CHECK: ; %bb.0: 601; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 602; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 603; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 604; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 605; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 606; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 607; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 608; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 609; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v1 610; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 611; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 612; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 613; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 614; CHECK-NEXT: v_mul_lo_u32 v5, v5, v4 615; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 616; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 617; CHECK-NEXT: v_mul_hi_u32 v4, v0, v4 618; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1 619; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v4 620; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 621; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 622; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 623; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 624; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 625; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v4 626; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 627; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 628; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 629; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 630; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 631; CHECK-NEXT: s_setpc_b64 s[30:31] 632 %shl.y = shl i32 4096, %y 633 %r = sdiv i32 %x, %shl.y 634 ret i32 %r 635} 636 637define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 638; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom: 639; GISEL: ; %bb.0: 640; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GISEL-NEXT: s_movk_i32 s4, 0x1000 642; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 643; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 644; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 645; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 646; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 647; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 648; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2 649; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 650; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 651; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 652; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 653; GISEL-NEXT: v_xor_b32_e32 v4, v4, v6 654; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 655; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7 656; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 657; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 658; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 659; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 660; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 661; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 662; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 663; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 664; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 665; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 666; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 667; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 668; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 669; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 670; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 671; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 672; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 673; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 674; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 675; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 676; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 677; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6 678; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 679; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v7 680; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 681; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 682; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 683; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 684; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v2 685; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 686; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] 687; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3 688; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 689; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v6 690; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5] 691; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7 692; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 693; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc 694; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 695; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc 696; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 697; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 698; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 699; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 700; GISEL-NEXT: s_setpc_b64 s[30:31] 701; 702; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom: 703; CGP: ; %bb.0: 704; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; CGP-NEXT: s_movk_i32 s4, 0x1000 706; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 707; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 708; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 709; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 710; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 711; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 712; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 713; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 714; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 715; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 716; CGP-NEXT: v_xor_b32_e32 v4, v4, v6 717; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 718; CGP-NEXT: v_mul_lo_u32 v8, v0, 0 719; CGP-NEXT: v_xor_b32_e32 v5, v5, v7 720; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 721; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 722; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 723; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 724; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 725; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 726; CGP-NEXT: v_cvt_f32_u32_e32 v10, v3 727; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 728; CGP-NEXT: v_rcp_f32_e32 v6, v6 729; CGP-NEXT: v_rcp_f32_e32 v10, v10 730; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 731; CGP-NEXT: v_mul_f32_e32 v10, 0x4f7ffffe, v10 732; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 733; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 734; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 735; CGP-NEXT: v_mul_lo_u32 v12, v6, 0 736; CGP-NEXT: v_mul_lo_u32 v11, v11, v10 737; CGP-NEXT: v_mul_lo_u32 v13, v10, 0 738; CGP-NEXT: v_mul_lo_u32 v14, 0, v7 739; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 740; CGP-NEXT: v_mul_lo_u32 v15, 0, v11 741; CGP-NEXT: v_mul_hi_u32 v11, v10, v11 742; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 743; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 744; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 745; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 746; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 747; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v11 748; CGP-NEXT: v_mul_lo_u32 v10, 0, v6 749; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 750; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 751; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 752; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 753; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 754; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 755; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 756; CGP-NEXT: v_mul_lo_u32 v8, v6, v2 757; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v6 758; CGP-NEXT: v_mul_lo_u32 v10, v7, v3 759; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v7 760; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 761; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 762; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 763; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 764; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v2 765; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 766; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] 767; CGP-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3 768; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 769; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6 770; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5] 771; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 772; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 773; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc 774; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 775; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc 776; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 777; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 778; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 779; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 780; CGP-NEXT: s_setpc_b64 s[30:31] 781 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 782 %r = sdiv <2 x i32> %x, %shl.y 783 ret <2 x i32> %r 784} 785 786define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) { 787; GISEL-LABEL: v_sdiv_i32_24bit: 788; GISEL: ; %bb.0: 789; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 790; GISEL-NEXT: s_mov_b32 s4, 0xffffff 791; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 792; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 793; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 794; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 795; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 796; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 797; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 798; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 799; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 800; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 801; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 802; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 803; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 804; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 805; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 806; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 807; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 808; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 809; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 810; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 811; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 812; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 813; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 814; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 815; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4 816; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 817; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 818; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 819; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 820; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 821; GISEL-NEXT: s_setpc_b64 s[30:31] 822; 823; CGP-LABEL: v_sdiv_i32_24bit: 824; CGP: ; %bb.0: 825; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 826; CGP-NEXT: s_mov_b32 s4, 0xffffff 827; CGP-NEXT: v_and_b32_e32 v0, s4, v0 828; CGP-NEXT: v_and_b32_e32 v1, s4, v1 829; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 830; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 831; CGP-NEXT: v_mul_lo_u32 v4, v0, 0 832; CGP-NEXT: v_rcp_f32_e32 v2, v2 833; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 834; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 835; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 836; CGP-NEXT: v_mul_lo_u32 v5, v2, 0 837; CGP-NEXT: v_mul_lo_u32 v6, 0, v3 838; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 839; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 840; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 841; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 842; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 843; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 844; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 845; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 846; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 847; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 848; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 849; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 850; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 851; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 852; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 853; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 854; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 855; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 856; CGP-NEXT: s_setpc_b64 s[30:31] 857 %num.mask = and i32 %num, 16777215 858 %den.mask = and i32 %den, 16777215 859 %result = sdiv i32 %num.mask, %den.mask 860 ret i32 %result 861} 862 863define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 864; GISEL-LABEL: v_sdiv_v2i32_24bit: 865; GISEL: ; %bb.0: 866; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; GISEL-NEXT: s_mov_b32 s4, 0xffffff 868; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 869; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 870; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 871; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 872; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 873; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 874; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 875; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 876; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 877; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 878; GISEL-NEXT: v_xor_b32_e32 v8, v4, v5 879; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 880; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 881; GISEL-NEXT: v_xor_b32_e32 v9, v6, v7 882; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 883; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 884; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 885; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 886; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 887; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 888; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 889; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 890; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 891; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 892; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 893; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 894; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 895; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 896; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 897; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 898; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 899; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 900; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 901; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 902; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 903; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 904; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 905; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 906; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 907; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 908; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 909; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 910; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 911; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 912; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 913; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 914; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] 915; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 916; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 917; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 918; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 919; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 920; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 921; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 922; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 923; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 924; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 925; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9 926; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 927; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 928; GISEL-NEXT: s_setpc_b64 s[30:31] 929; 930; CGP-LABEL: v_sdiv_v2i32_24bit: 931; CGP: ; %bb.0: 932; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 933; CGP-NEXT: s_mov_b32 s4, 0xffffff 934; CGP-NEXT: v_and_b32_e32 v0, s4, v0 935; CGP-NEXT: v_and_b32_e32 v1, s4, v1 936; CGP-NEXT: v_and_b32_e32 v2, s4, v2 937; CGP-NEXT: v_and_b32_e32 v3, s4, v3 938; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 939; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 940; CGP-NEXT: v_mul_lo_u32 v6, v0, 0 941; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 942; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 943; CGP-NEXT: v_mul_lo_u32 v9, v1, 0 944; CGP-NEXT: v_rcp_f32_e32 v4, v4 945; CGP-NEXT: v_rcp_f32_e32 v7, v7 946; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 947; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 948; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 949; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 950; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 951; CGP-NEXT: v_mul_lo_u32 v10, v4, 0 952; CGP-NEXT: v_mul_lo_u32 v8, v8, v7 953; CGP-NEXT: v_mul_lo_u32 v11, v7, 0 954; CGP-NEXT: v_mul_lo_u32 v12, 0, v5 955; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 956; CGP-NEXT: v_mul_lo_u32 v13, 0, v8 957; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 958; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 959; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 960; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 961; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 962; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 963; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v8 964; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 965; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 966; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 967; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 968; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 969; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 970; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 971; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 972; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 973; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 974; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 975; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 976; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 977; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 978; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 979; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 980; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 981; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 982; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 983; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 984; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 985; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 986; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 987; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 988; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 989; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 990; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 991; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 992; CGP-NEXT: s_setpc_b64 s[30:31] 993 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 994 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 995 %result = sdiv <2 x i32> %num.mask, %den.mask 996 ret <2 x i32> %result 997} 998