1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_urem_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_urem_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 12; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 13; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 14; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 15; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 16; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 17; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 18; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 19; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 20; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 21; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 22; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 23; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 24; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 25; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 26; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 27; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 28; GISEL-NEXT: s_setpc_b64 s[30:31] 29; 30; CGP-LABEL: v_urem_i32: 31; CGP: ; %bb.0: 32; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 34; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 35; CGP-NEXT: v_rcp_f32_e32 v2, v2 36; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 37; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 38; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 39; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 40; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 41; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 42; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 43; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 44; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 45; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 46; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 47; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 48; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 49; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 50; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 51; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 52; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 53; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 54; CGP-NEXT: s_setpc_b64 s[30:31] 55 %result = urem i32 %num, %den 56 ret i32 %result 57} 58 59; FIXME: This is a workaround for not handling uniform VGPR case. 60declare i32 @llvm.amdgcn.readfirstlane(i32) 61 62define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) { 63; GISEL-LABEL: s_urem_i32: 64; GISEL: ; %bb.0: 65; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 66; GISEL-NEXT: s_sub_i32 s2, 0, s1 67; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 68; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 69; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 70; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0 71; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 72; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 73; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 74; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 75; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 76; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 77; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 78; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 79; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 80; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 81; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 82; GISEL-NEXT: v_readfirstlane_b32 s0, v0 83; GISEL-NEXT: ; return to shader part epilog 84; 85; CGP-LABEL: s_urem_i32: 86; CGP: ; %bb.0: 87; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 88; CGP-NEXT: s_sub_i32 s2, 0, s1 89; CGP-NEXT: v_rcp_f32_e32 v0, v0 90; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 91; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 92; CGP-NEXT: v_mul_lo_u32 v1, s2, v0 93; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 94; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 95; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 96; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 97; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 98; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 99; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 100; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 101; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 102; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 103; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 104; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 105; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 106; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 107; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 108; CGP-NEXT: v_readfirstlane_b32 s0, v0 109; CGP-NEXT: ; return to shader part epilog 110 %result = urem i32 %num, %den 111 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 112 ret i32 %readlane 113} 114 115define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) { 116; GISEL-LABEL: v_urem_v2i32: 117; GISEL: ; %bb.0: 118; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 120; GISEL-NEXT: s_mov_b32 s4, 0x4f7ffffe 121; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 122; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 123; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 124; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 125; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 126; GISEL-NEXT: v_mul_f32_e32 v4, s4, v4 127; GISEL-NEXT: v_mul_f32_e32 v6, s4, v6 128; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 129; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 130; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 131; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 132; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 133; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 134; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 135; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 136; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 137; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 138; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 139; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 140; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 141; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 142; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 143; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 144; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 145; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 146; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 147; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 148; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 149; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 150; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 151; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 152; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 153; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 154; GISEL-NEXT: s_setpc_b64 s[30:31] 155; 156; CGP-LABEL: v_urem_v2i32: 157; CGP: ; %bb.0: 158; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 160; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 161; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 162; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 163; CGP-NEXT: v_rcp_f32_e32 v4, v4 164; CGP-NEXT: v_rcp_f32_e32 v6, v6 165; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 166; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 167; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 168; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 169; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 170; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 171; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 172; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 173; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 174; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 175; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 176; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 177; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 178; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 179; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 180; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 181; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 182; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 183; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 184; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 185; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 186; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 187; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 188; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 189; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 190; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 191; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 192; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 193; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 194; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 195; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 196; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 197; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 198; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 199; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 200; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 201; CGP-NEXT: s_setpc_b64 s[30:31] 202 %result = urem <2 x i32> %num, %den 203 ret <2 x i32> %result 204} 205 206define i32 @v_urem_i32_pow2k_denom(i32 %num) { 207; CHECK-LABEL: v_urem_i32_pow2k_denom: 208; CHECK: ; %bb.0: 209; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; CHECK-NEXT: s_movk_i32 s4, 0x1000 211; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000 212; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 213; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 214; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 215; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 216; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 217; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 218; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 219; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 220; CHECK-NEXT: v_lshlrev_b32_e32 v1, 12, v1 221; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 222; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 223; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 224; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 225; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 226; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 227; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 228; CHECK-NEXT: s_setpc_b64 s[30:31] 229 %result = urem i32 %num, 4096 230 ret i32 %result 231} 232 233define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { 234; GISEL-LABEL: v_urem_v2i32_pow2k_denom: 235; GISEL: ; %bb.0: 236; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GISEL-NEXT: s_movk_i32 s4, 0x1000 238; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 239; GISEL-NEXT: s_sub_i32 s5, 0, s4 240; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 241; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 242; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 243; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 244; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 245; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 246; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 247; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 248; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3 249; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2 250; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 251; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 252; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 253; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 254; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 255; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 256; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 257; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 258; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 259; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 260; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 261; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 262; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 263; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 264; GISEL-NEXT: s_setpc_b64 s[30:31] 265; 266; CGP-LABEL: v_urem_v2i32_pow2k_denom: 267; CGP: ; %bb.0: 268; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; CGP-NEXT: s_movk_i32 s4, 0x1000 270; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 271; CGP-NEXT: s_mov_b32 s5, 0x4f7ffffe 272; CGP-NEXT: s_movk_i32 s6, 0xf000 273; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 274; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 275; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 276; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 277; CGP-NEXT: v_mul_f32_e32 v3, s5, v3 278; CGP-NEXT: v_mul_f32_e32 v4, s5, v4 279; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 280; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 281; CGP-NEXT: v_mul_lo_u32 v5, s6, v3 282; CGP-NEXT: v_mul_lo_u32 v6, s6, v4 283; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 284; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 285; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 286; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 287; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 288; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 289; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3 290; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4 291; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 292; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 293; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 294; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 295; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 296; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 297; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 298; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 299; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 300; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 301; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 302; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 303; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 304; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 305; CGP-NEXT: s_setpc_b64 s[30:31] 306 %result = urem <2 x i32> %num, <i32 4096, i32 4096> 307 ret <2 x i32> %result 308} 309 310define i32 @v_urem_i32_oddk_denom(i32 %num) { 311; CHECK-LABEL: v_urem_i32_oddk_denom: 312; CHECK: ; %bb.0: 313; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb 315; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705 316; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 317; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 318; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 319; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 320; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 321; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 322; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 323; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 324; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4 325; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 326; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 327; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 328; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 329; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 330; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 331; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 332; CHECK-NEXT: s_setpc_b64 s[30:31] 333 %result = urem i32 %num, 1235195 334 ret i32 %result 335} 336 337define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { 338; GISEL-LABEL: v_urem_v2i32_oddk_denom: 339; GISEL: ; %bb.0: 340; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb 342; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 343; GISEL-NEXT: s_sub_i32 s5, 0, s4 344; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 345; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 346; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 347; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 348; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 349; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 350; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 351; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 352; GISEL-NEXT: v_mul_lo_u32 v3, v3, s4 353; GISEL-NEXT: v_mul_lo_u32 v2, v2, s4 354; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 355; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 356; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 357; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 358; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 359; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 360; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 361; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 362; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 363; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 364; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 365; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 366; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 367; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 368; GISEL-NEXT: s_setpc_b64 s[30:31] 369; 370; CGP-LABEL: v_urem_v2i32_oddk_denom: 371; CGP: ; %bb.0: 372; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; CGP-NEXT: s_mov_b32 s4, 0x12d8fb 374; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb 375; CGP-NEXT: s_mov_b32 s5, 0xffed2705 376; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 377; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 378; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 379; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 380; CGP-NEXT: v_mul_lo_u32 v4, s5, v3 381; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 382; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 383; CGP-NEXT: v_mul_hi_u32 v4, v0, v3 384; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 385; CGP-NEXT: v_mul_lo_u32 v4, v4, s4 386; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 387; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 388; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 389; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 390; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 391; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 392; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 393; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 394; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 395; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 396; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 397; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 398; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 399; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 400; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 401; CGP-NEXT: s_setpc_b64 s[30:31] 402 %result = urem <2 x i32> %num, <i32 1235195, i32 1235195> 403 ret <2 x i32> %result 404} 405 406define i32 @v_urem_i32_pow2_shl_denom(i32 %x, i32 %y) { 407; CHECK-LABEL: v_urem_i32_pow2_shl_denom: 408; CHECK: ; %bb.0: 409; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 411; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 412; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 413; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 414; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 415; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 416; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 417; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 418; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 419; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 420; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 421; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 422; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 423; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 424; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 425; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 426; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 427; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 428; CHECK-NEXT: s_setpc_b64 s[30:31] 429 %shl.y = shl i32 4096, %y 430 %r = urem i32 %x, %shl.y 431 ret i32 %r 432} 433 434define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 435; GISEL-LABEL: v_urem_v2i32_pow2_shl_denom: 436; GISEL: ; %bb.0: 437; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GISEL-NEXT: s_movk_i32 s4, 0x1000 439; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe 440; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 441; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 442; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 443; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 444; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 445; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 446; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 447; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 448; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 449; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6 450; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 451; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 452; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 453; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 454; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 455; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 456; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 457; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 458; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 459; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 460; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 461; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 462; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 463; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 464; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 465; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 466; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 467; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 468; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 469; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 470; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 471; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 472; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 473; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 474; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 475; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 476; GISEL-NEXT: s_setpc_b64 s[30:31] 477; 478; CGP-LABEL: v_urem_v2i32_pow2_shl_denom: 479; CGP: ; %bb.0: 480; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; CGP-NEXT: s_movk_i32 s4, 0x1000 482; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 483; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 484; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 485; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 486; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 487; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 488; CGP-NEXT: v_rcp_f32_e32 v4, v4 489; CGP-NEXT: v_rcp_f32_e32 v6, v6 490; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 491; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 492; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 493; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 494; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 495; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 496; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 497; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 498; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 499; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 500; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 501; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 502; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 503; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 504; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 505; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 506; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 507; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 508; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 509; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 510; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 511; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 512; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 513; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 514; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 515; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 516; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 517; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 518; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 519; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 520; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 521; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 522; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 523; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 524; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 525; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 526; CGP-NEXT: s_setpc_b64 s[30:31] 527 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 528 %r = urem <2 x i32> %x, %shl.y 529 ret <2 x i32> %r 530} 531 532define i32 @v_urem_i32_24bit(i32 %num, i32 %den) { 533; GISEL-LABEL: v_urem_i32_24bit: 534; GISEL: ; %bb.0: 535; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GISEL-NEXT: s_mov_b32 s4, 0xffffff 537; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 538; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 539; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 540; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 541; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 542; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 543; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 544; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 545; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 546; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 547; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 548; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 549; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 550; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 551; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 552; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 553; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 554; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 555; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 556; GISEL-NEXT: s_setpc_b64 s[30:31] 557; 558; CGP-LABEL: v_urem_i32_24bit: 559; CGP: ; %bb.0: 560; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; CGP-NEXT: s_mov_b32 s4, 0xffffff 562; CGP-NEXT: v_and_b32_e32 v0, s4, v0 563; CGP-NEXT: v_and_b32_e32 v1, s4, v1 564; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 565; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 566; CGP-NEXT: v_rcp_f32_e32 v2, v2 567; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 568; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 569; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 570; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 571; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 572; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 573; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 574; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 575; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 576; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 577; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 578; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 579; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 580; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 581; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 582; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 583; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 584; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 585; CGP-NEXT: s_setpc_b64 s[30:31] 586 %num.mask = and i32 %num, 16777215 587 %den.mask = and i32 %den, 16777215 588 %result = urem i32 %num.mask, %den.mask 589 ret i32 %result 590} 591 592define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 593; GISEL-LABEL: v_urem_v2i32_24bit: 594; GISEL: ; %bb.0: 595; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; GISEL-NEXT: s_mov_b32 s4, 0xffffff 597; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe 598; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 599; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 600; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 601; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 602; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 603; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 604; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 605; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 606; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 607; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 608; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 609; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6 610; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 611; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 612; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 613; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 614; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 615; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 616; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 617; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 618; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 619; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 620; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 621; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 622; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 623; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 624; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 625; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 626; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 627; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 628; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 629; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 630; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 631; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 632; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 633; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 634; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 635; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 636; GISEL-NEXT: s_setpc_b64 s[30:31] 637; 638; CGP-LABEL: v_urem_v2i32_24bit: 639; CGP: ; %bb.0: 640; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; CGP-NEXT: s_mov_b32 s4, 0xffffff 642; CGP-NEXT: v_and_b32_e32 v0, s4, v0 643; CGP-NEXT: v_and_b32_e32 v1, s4, v1 644; CGP-NEXT: v_and_b32_e32 v2, s4, v2 645; CGP-NEXT: v_and_b32_e32 v3, s4, v3 646; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 647; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 648; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 649; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 650; CGP-NEXT: v_rcp_f32_e32 v4, v4 651; CGP-NEXT: v_rcp_f32_e32 v6, v6 652; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 653; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 654; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 655; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 656; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 657; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 658; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 659; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 660; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 661; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 662; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 663; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 664; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 665; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 666; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 667; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 668; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 669; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 670; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 671; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 672; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 673; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 674; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 675; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 676; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 677; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 678; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 679; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 680; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 681; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 682; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 683; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 684; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 685; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 686; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 687; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 688; CGP-NEXT: s_setpc_b64 s[30:31] 689 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 690 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 691 %result = urem <2 x i32> %num.mask, %den.mask 692 ret <2 x i32> %result 693} 694