1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_udiv_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_udiv_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 12; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 13; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 14; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 15; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 16; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 17; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 18; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 19; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 20; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 21; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 22; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 23; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 24; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 25; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 26; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 27; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 28; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 29; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 30; GISEL-NEXT: s_setpc_b64 s[30:31] 31; 32; CGP-LABEL: v_udiv_i32: 33; CGP: ; %bb.0: 34; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 36; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 37; CGP-NEXT: v_rcp_f32_e32 v2, v2 38; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 39; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 40; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 41; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 42; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 43; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 44; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 45; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 46; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 47; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 48; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 49; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 50; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 51; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 52; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 53; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 54; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 55; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 56; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 57; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 58; CGP-NEXT: s_setpc_b64 s[30:31] 59 %result = udiv i32 %num, %den 60 ret i32 %result 61} 62 63; FIXME: This is a workaround for not handling uniform VGPR case. 64declare i32 @llvm.amdgcn.readfirstlane(i32) 65 66define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) { 67; GISEL-LABEL: s_udiv_i32: 68; GISEL: ; %bb.0: 69; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 70; GISEL-NEXT: s_sub_i32 s2, 0, s1 71; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 72; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 73; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 74; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0 75; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 76; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 77; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 78; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1 79; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 80; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 81; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 82; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 83; GISEL-NEXT: v_subrev_i32_e64 v2, s[2:3], s1, v1 84; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 85; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 86; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 87; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 88; GISEL-NEXT: v_readfirstlane_b32 s0, v0 89; GISEL-NEXT: ; return to shader part epilog 90; 91; CGP-LABEL: s_udiv_i32: 92; CGP: ; %bb.0: 93; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 94; CGP-NEXT: s_sub_i32 s2, 0, s1 95; CGP-NEXT: v_rcp_f32_e32 v0, v0 96; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 97; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 98; CGP-NEXT: v_mul_lo_u32 v1, s2, v0 99; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 100; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 101; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 102; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 103; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 104; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 105; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 106; CGP-NEXT: v_mul_lo_u32 v1, v0, s1 107; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 108; CGP-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 109; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 110; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 111; CGP-NEXT: v_subrev_i32_e64 v2, s[2:3], s1, v1 112; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 113; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 114; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 115; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 116; CGP-NEXT: v_readfirstlane_b32 s0, v0 117; CGP-NEXT: ; return to shader part epilog 118 %result = udiv i32 %num, %den 119 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 120 ret i32 %readlane 121} 122 123define <2 x i32> @v_udiv_v2i32(<2 x i32> %num, <2 x i32> %den) { 124; GISEL-LABEL: v_udiv_v2i32: 125; GISEL: ; %bb.0: 126; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 128; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 129; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 130; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 131; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 132; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 133; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 134; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 135; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 136; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 137; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 138; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 139; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 140; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 141; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 142; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 143; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 144; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 145; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 146; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 147; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 148; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 149; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 150; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 151; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 152; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 153; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 154; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 155; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 156; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 157; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 158; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 159; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 160; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 161; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 162; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 163; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 164; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 165; GISEL-NEXT: s_setpc_b64 s[30:31] 166; 167; CGP-LABEL: v_udiv_v2i32: 168; CGP: ; %bb.0: 169; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 171; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 172; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 173; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 174; CGP-NEXT: v_rcp_f32_e32 v4, v4 175; CGP-NEXT: v_rcp_f32_e32 v6, v6 176; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 177; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 178; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 179; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 180; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 181; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 182; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 183; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 184; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 185; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 186; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 187; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 188; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 189; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 190; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 191; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 192; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 193; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 194; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 195; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 196; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 197; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 198; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 199; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 200; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 201; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 202; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 203; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 204; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 205; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 206; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 207; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 208; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 209; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 210; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 211; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 212; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 213; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 214; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 215; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 216; CGP-NEXT: s_setpc_b64 s[30:31] 217 %result = udiv <2 x i32> %num, %den 218 ret <2 x i32> %result 219} 220 221define i32 @v_udiv_i32_pow2k_denom(i32 %num) { 222; CHECK-LABEL: v_udiv_i32_pow2k_denom: 223; CHECK: ; %bb.0: 224; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; CHECK-NEXT: s_movk_i32 s6, 0x1000 226; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000 227; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 228; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 229; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 230; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 231; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 232; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 233; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 234; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 235; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1 236; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 237; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 238; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 239; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 240; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0 241; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 242; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 243; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 244; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 245; CHECK-NEXT: s_setpc_b64 s[30:31] 246 %result = udiv i32 %num, 4096 247 ret i32 %result 248} 249 250define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) { 251; GISEL-LABEL: v_udiv_v2i32_pow2k_denom: 252; GISEL: ; %bb.0: 253; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GISEL-NEXT: s_movk_i32 s8, 0x1000 255; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8 256; GISEL-NEXT: s_sub_i32 s4, 0, s8 257; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 258; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2 259; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 260; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 261; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 262; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3 263; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2 264; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 265; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5 266; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 267; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 268; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 269; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 270; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v3 271; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 272; GISEL-NEXT: v_lshlrev_b32_e32 v6, 12, v2 273; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 274; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 275; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 276; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 277; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 278; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 279; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 280; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] 281; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1 282; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 283; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3 284; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 285; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2 286; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 287; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 288; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 289; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 290; GISEL-NEXT: s_setpc_b64 s[30:31] 291; 292; CGP-LABEL: v_udiv_v2i32_pow2k_denom: 293; CGP: ; %bb.0: 294; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; CGP-NEXT: s_movk_i32 s4, 0x1000 296; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 297; CGP-NEXT: s_movk_i32 s5, 0xf000 298; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000 299; CGP-NEXT: v_cvt_f32_u32_e32 v4, s4 300; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 301; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 302; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 303; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 304; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 305; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 306; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 307; CGP-NEXT: v_mul_lo_u32 v6, s5, v4 308; CGP-NEXT: v_mul_lo_u32 v3, v3, v5 309; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 310; CGP-NEXT: v_mul_hi_u32 v3, v5, v3 311; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 312; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 313; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 314; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 315; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v4 316; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 317; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3 318; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3 319; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 320; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 321; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 322; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 323; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0 324; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 325; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5] 326; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2 327; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 328; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v4 329; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] 330; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3 331; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 332; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 333; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 334; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 335; CGP-NEXT: s_setpc_b64 s[30:31] 336 %result = udiv <2 x i32> %num, <i32 4096, i32 4096> 337 ret <2 x i32> %result 338} 339 340define i32 @v_udiv_i32_oddk_denom(i32 %num) { 341; CHECK-LABEL: v_udiv_i32_oddk_denom: 342; CHECK: ; %bb.0: 343; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb 345; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705 346; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 347; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 348; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 349; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 350; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 351; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 352; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 353; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 354; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 355; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 356; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 357; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 358; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 359; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0 360; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 361; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 362; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 363; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 364; CHECK-NEXT: s_setpc_b64 s[30:31] 365 %result = udiv i32 %num, 1235195 366 ret i32 %result 367} 368 369define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) { 370; GISEL-LABEL: v_udiv_v2i32_oddk_denom: 371; GISEL: ; %bb.0: 372; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb 374; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8 375; GISEL-NEXT: s_sub_i32 s4, 0, s8 376; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 377; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2 378; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 379; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 380; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 381; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3 382; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2 383; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 384; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5 385; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 386; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 387; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 388; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 389; GISEL-NEXT: v_mul_lo_u32 v4, v3, s8 390; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 391; GISEL-NEXT: v_mul_lo_u32 v6, v2, s8 392; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 393; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 394; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 395; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 396; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 397; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 398; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 399; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] 400; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1 401; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 402; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3 403; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 404; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2 405; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 406; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 407; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 408; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 409; GISEL-NEXT: s_setpc_b64 s[30:31] 410; 411; CGP-LABEL: v_udiv_v2i32_oddk_denom: 412; CGP: ; %bb.0: 413; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; CGP-NEXT: s_mov_b32 s4, 0x12d8fb 415; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb 416; CGP-NEXT: s_mov_b32 s5, 0xffed2705 417; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 418; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 419; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 420; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 421; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 422; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 423; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 424; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 425; CGP-NEXT: v_mul_lo_u32 v5, s5, v3 426; CGP-NEXT: v_mul_lo_u32 v6, s5, v4 427; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 428; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 429; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 430; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 431; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 432; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 433; CGP-NEXT: v_mul_lo_u32 v5, v3, s4 434; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3 435; CGP-NEXT: v_mul_lo_u32 v7, v4, v2 436; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 437; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 438; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 439; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 440; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 441; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0 442; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 443; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] 444; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2 445; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 446; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 447; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] 448; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 449; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 450; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc 451; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 452; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc 453; CGP-NEXT: s_setpc_b64 s[30:31] 454 %result = udiv <2 x i32> %num, <i32 1235195, i32 1235195> 455 ret <2 x i32> %result 456} 457 458define i32 @v_udiv_i32_pow2_shl_denom(i32 %x, i32 %y) { 459; CHECK-LABEL: v_udiv_i32_pow2_shl_denom: 460; CHECK: ; %bb.0: 461; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 463; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 464; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 465; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 466; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 467; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 468; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 469; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 470; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 471; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 472; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 473; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 474; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 475; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 476; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 477; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 478; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 479; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 480; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 481; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 482; CHECK-NEXT: s_setpc_b64 s[30:31] 483 %shl.y = shl i32 4096, %y 484 %r = udiv i32 %x, %shl.y 485 ret i32 %r 486} 487 488define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 489; GISEL-LABEL: v_udiv_v2i32_pow2_shl_denom: 490; GISEL: ; %bb.0: 491; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GISEL-NEXT: s_movk_i32 s4, 0x1000 493; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 494; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 495; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 496; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 497; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 498; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 499; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 500; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 501; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 502; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 503; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 504; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 505; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 506; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 507; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 508; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 509; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 510; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 511; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 512; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 513; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 514; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 515; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 516; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 517; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 518; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 519; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 520; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 521; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 522; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 523; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 524; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 525; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 526; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 527; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 528; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 529; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 530; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 531; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 532; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 533; GISEL-NEXT: s_setpc_b64 s[30:31] 534; 535; CGP-LABEL: v_udiv_v2i32_pow2_shl_denom: 536; CGP: ; %bb.0: 537; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 538; CGP-NEXT: s_movk_i32 s4, 0x1000 539; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 540; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 541; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 542; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 543; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 544; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 545; CGP-NEXT: v_rcp_f32_e32 v4, v4 546; CGP-NEXT: v_rcp_f32_e32 v6, v6 547; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 548; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 549; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 550; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 551; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 552; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 553; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 554; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 555; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 556; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 557; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 558; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 559; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 560; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 561; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 562; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 563; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 564; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 565; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 566; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 567; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 568; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 569; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 570; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 571; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 572; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 573; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 574; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 575; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 576; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 577; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 578; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 579; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 580; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 581; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 582; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 583; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 584; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 585; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 586; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 587; CGP-NEXT: s_setpc_b64 s[30:31] 588 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 589 %r = udiv <2 x i32> %x, %shl.y 590 ret <2 x i32> %r 591} 592 593define i32 @v_udiv_i32_24bit(i32 %num, i32 %den) { 594; GISEL-LABEL: v_udiv_i32_24bit: 595; GISEL: ; %bb.0: 596; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GISEL-NEXT: s_mov_b32 s4, 0xffffff 598; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 599; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 600; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 601; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 602; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 603; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 604; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 605; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 606; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 607; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 608; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 609; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 610; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 611; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 612; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 613; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 614; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 615; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 616; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 617; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 618; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 619; GISEL-NEXT: s_setpc_b64 s[30:31] 620; 621; CGP-LABEL: v_udiv_i32_24bit: 622; CGP: ; %bb.0: 623; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; CGP-NEXT: s_mov_b32 s4, 0xffffff 625; CGP-NEXT: v_and_b32_e32 v0, s4, v0 626; CGP-NEXT: v_and_b32_e32 v1, s4, v1 627; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 628; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 629; CGP-NEXT: v_rcp_f32_e32 v2, v2 630; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 631; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 632; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 633; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 634; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 635; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 636; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 637; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 638; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 639; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 640; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 641; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 642; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 643; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 644; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 645; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 646; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 647; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 648; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 649; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 650; CGP-NEXT: s_setpc_b64 s[30:31] 651 %num.mask = and i32 %num, 16777215 652 %den.mask = and i32 %den, 16777215 653 %result = udiv i32 %num.mask, %den.mask 654 ret i32 %result 655} 656 657define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 658; GISEL-LABEL: v_udiv_v2i32_24bit: 659; GISEL: ; %bb.0: 660; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 661; GISEL-NEXT: s_mov_b32 s4, 0xffffff 662; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 663; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 664; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 665; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 666; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 667; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 668; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 669; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 670; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 671; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 672; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 673; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 674; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 675; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 676; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 677; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 678; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 679; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 680; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 681; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 682; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 683; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 684; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 685; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 686; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 687; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 688; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 689; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 690; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 691; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 692; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 693; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 694; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 695; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 696; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 697; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 698; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 699; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 700; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 701; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 702; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 703; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 704; GISEL-NEXT: s_setpc_b64 s[30:31] 705; 706; CGP-LABEL: v_udiv_v2i32_24bit: 707; CGP: ; %bb.0: 708; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 709; CGP-NEXT: s_mov_b32 s4, 0xffffff 710; CGP-NEXT: v_and_b32_e32 v0, s4, v0 711; CGP-NEXT: v_and_b32_e32 v1, s4, v1 712; CGP-NEXT: v_and_b32_e32 v2, s4, v2 713; CGP-NEXT: v_and_b32_e32 v3, s4, v3 714; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 715; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 716; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 717; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 718; CGP-NEXT: v_rcp_f32_e32 v4, v4 719; CGP-NEXT: v_rcp_f32_e32 v6, v6 720; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 721; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 722; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 723; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 724; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 725; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 726; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 727; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 728; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 729; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 730; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 731; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 732; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 733; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 734; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 735; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 736; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 737; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 738; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 739; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 740; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 741; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 742; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 743; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 744; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 745; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 746; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 747; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 748; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 749; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 750; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 751; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 752; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 753; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 754; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 755; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 756; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 757; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 758; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 759; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 760; CGP-NEXT: s_setpc_b64 s[30:31] 761 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 762 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 763 %result = udiv <2 x i32> %num.mask, %den.mask 764 ret <2 x i32> %result 765} 766