1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_srem_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_srem_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 12; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 13; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 14; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 15; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 16; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 17; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 18; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 19; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 20; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 21; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 22; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 23; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 24; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 25; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 26; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 27; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 28; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 29; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 30; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 31; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 32; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 33; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 34; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 35; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 36; GISEL-NEXT: s_setpc_b64 s[30:31] 37; 38; CGP-LABEL: v_srem_i32: 39; CGP: ; %bb.0: 40; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 42; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 43; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 44; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 45; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 46; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 47; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 48; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 49; CGP-NEXT: v_rcp_f32_e32 v3, v3 50; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 51; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 52; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 53; CGP-NEXT: v_mul_lo_u32 v5, 0, v4 54; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 55; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 56; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 57; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 58; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 59; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 60; CGP-NEXT: v_mul_lo_u32 v3, v3, v1 61; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 62; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 63; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 64; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 65; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 66; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 67; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 68; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 69; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 70; CGP-NEXT: s_setpc_b64 s[30:31] 71 %result = srem i32 %num, %den 72 ret i32 %result 73} 74 75; FIXME: This is a workaround for not handling uniform VGPR case. 76declare i32 @llvm.amdgcn.readfirstlane(i32) 77 78define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { 79; GISEL-LABEL: s_srem_i32: 80; GISEL: ; %bb.0: 81; GISEL-NEXT: s_ashr_i32 s2, s0, 31 82; GISEL-NEXT: s_ashr_i32 s3, s1, 31 83; GISEL-NEXT: s_add_i32 s0, s0, s2 84; GISEL-NEXT: s_add_i32 s1, s1, s3 85; GISEL-NEXT: s_xor_b32 s0, s0, s2 86; GISEL-NEXT: s_xor_b32 s1, s1, s3 87; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 88; GISEL-NEXT: s_sub_i32 s3, 0, s1 89; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 90; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 91; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 92; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0 93; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 94; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 95; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 96; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 97; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 98; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 99; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 100; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 101; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 102; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 103; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 104; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 105; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 106; GISEL-NEXT: v_readfirstlane_b32 s0, v0 107; GISEL-NEXT: ; return to shader part epilog 108; 109; CGP-LABEL: s_srem_i32: 110; CGP: ; %bb.0: 111; CGP-NEXT: s_ashr_i32 s2, s0, 31 112; CGP-NEXT: s_ashr_i32 s3, s1, 31 113; CGP-NEXT: s_add_i32 s0, s0, s2 114; CGP-NEXT: s_add_i32 s1, s1, s3 115; CGP-NEXT: s_xor_b32 s0, s0, s2 116; CGP-NEXT: s_xor_b32 s1, s1, s3 117; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 118; CGP-NEXT: s_sub_i32 s3, 0, s1 119; CGP-NEXT: v_rcp_f32_e32 v0, v0 120; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 121; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 122; CGP-NEXT: v_mul_lo_u32 v1, s3, v0 123; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 124; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 125; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 126; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 127; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 128; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 129; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 130; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 131; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 132; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 133; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 134; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 135; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 136; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 137; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 138; CGP-NEXT: v_xor_b32_e32 v0, s2, v0 139; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 140; CGP-NEXT: v_readfirstlane_b32 s0, v0 141; CGP-NEXT: ; return to shader part epilog 142 %result = srem i32 %num, %den 143 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 144 ret i32 %readlane 145} 146 147define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { 148; GISEL-LABEL: v_srem_v2i32: 149; GISEL: ; %bb.0: 150; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 152; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 153; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 154; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 155; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 156; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 157; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 158; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 159; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 160; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 161; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 162; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 163; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 164; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 165; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 166; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 167; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 168; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 169; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 170; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 171; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 172; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 173; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 174; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 175; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 176; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 177; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 178; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 179; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 180; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 181; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 182; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 183; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 184; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 185; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 186; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 187; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 188; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 189; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 190; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 191; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 192; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 193; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 194; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 195; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 196; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 197; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 198; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 199; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 200; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 201; GISEL-NEXT: s_setpc_b64 s[30:31] 202; 203; CGP-LABEL: v_srem_v2i32: 204; CGP: ; %bb.0: 205; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 207; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 208; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 209; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 210; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 211; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 212; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 213; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 214; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 215; CGP-NEXT: v_xor_b32_e32 v2, v2, v5 216; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 217; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 218; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 219; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 220; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 221; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 222; CGP-NEXT: v_rcp_f32_e32 v5, v5 223; CGP-NEXT: v_rcp_f32_e32 v8, v8 224; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 225; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 226; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 227; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 228; CGP-NEXT: v_mul_lo_u32 v7, v7, v5 229; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 230; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 231; CGP-NEXT: v_mul_hi_u32 v7, v5, v7 232; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 233; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 234; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 235; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 236; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 237; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 238; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 239; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 240; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 241; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 242; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 243; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 244; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 245; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 246; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 247; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 248; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 249; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 250; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 251; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 252; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 253; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 254; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 255; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 256; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 257; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 258; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 259; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 260; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 261; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 262; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 263; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 264; CGP-NEXT: s_setpc_b64 s[30:31] 265 %result = srem <2 x i32> %num, %den 266 ret <2 x i32> %result 267} 268 269define i32 @v_srem_i32_pow2k_denom(i32 %num) { 270; CHECK-LABEL: v_srem_i32_pow2k_denom: 271; CHECK: ; %bb.0: 272; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; CHECK-NEXT: s_movk_i32 s4, 0x1000 274; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 275; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000 276; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000 277; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 278; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 279; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 280; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 281; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 282; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 283; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 284; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 285; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 286; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 287; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 288; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 289; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 290; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 291; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 292; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 293; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 294; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 295; CHECK-NEXT: s_setpc_b64 s[30:31] 296 %result = srem i32 %num, 4096 297 ret i32 %result 298} 299 300define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { 301; GISEL-LABEL: v_srem_v2i32_pow2k_denom: 302; GISEL: ; %bb.0: 303; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 305; GISEL-NEXT: s_add_i32 s4, 0x1000, 0 306; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 307; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 308; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 309; GISEL-NEXT: s_sub_i32 s5, 0, s4 310; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 311; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 312; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 313; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 314; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 315; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 316; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 317; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 318; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 319; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 320; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 321; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 322; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 323; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 324; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 325; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 326; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 327; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 328; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 329; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 330; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 331; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 332; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 333; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 334; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 335; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 336; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 337; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 338; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 339; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 340; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 341; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 342; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 343; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 344; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 345; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 346; GISEL-NEXT: s_setpc_b64 s[30:31] 347; 348; CGP-LABEL: v_srem_v2i32_pow2k_denom: 349; CGP: ; %bb.0: 350; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; CGP-NEXT: s_movk_i32 s4, 0x1000 352; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 353; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000 354; CGP-NEXT: s_movk_i32 s5, 0xf000 355; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 356; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 357; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 358; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x45800000 359; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 360; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 361; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 362; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 363; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 364; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 365; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 366; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 367; CGP-NEXT: v_mul_lo_u32 v8, s5, v3 368; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 369; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 370; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 371; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 372; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 373; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 374; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 375; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3 376; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4 377; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 378; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 379; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 380; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5 381; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 382; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 383; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 384; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 385; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 386; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5 387; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 388; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 389; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 390; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 391; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 392; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 393; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 394; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 395; CGP-NEXT: s_setpc_b64 s[30:31] 396 %result = srem <2 x i32> %num, <i32 4096, i32 4096> 397 ret <2 x i32> %result 398} 399 400define i32 @v_srem_i32_oddk_denom(i32 %num) { 401; CHECK-LABEL: v_srem_i32_oddk_denom: 402; CHECK: ; %bb.0: 403; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb 405; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 406; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8 407; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705 408; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 409; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 410; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 411; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 412; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 413; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 414; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 415; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 416; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4 417; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 418; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 419; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 420; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 421; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 422; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 423; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 424; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 425; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 426; CHECK-NEXT: s_setpc_b64 s[30:31] 427 %result = srem i32 %num, 1235195 428 ret i32 %result 429} 430 431define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { 432; GISEL-LABEL: v_srem_v2i32_oddk_denom: 433; GISEL: ; %bb.0: 434; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 435; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 436; GISEL-NEXT: s_add_i32 s4, 0x12d8fb, 0 437; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 438; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 439; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 440; GISEL-NEXT: s_sub_i32 s5, 0, s4 441; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 442; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 443; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 444; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 445; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 446; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 447; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 448; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 449; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 450; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 451; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 452; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 453; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 454; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 455; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 456; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 457; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 458; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 459; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 460; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 461; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 462; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 463; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 464; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 465; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 466; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 467; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 468; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 469; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 470; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 471; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 472; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 473; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 474; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 475; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 476; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 477; GISEL-NEXT: s_setpc_b64 s[30:31] 478; 479; CGP-LABEL: v_srem_v2i32_oddk_denom: 480; CGP: ; %bb.0: 481; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; CGP-NEXT: s_mov_b32 s4, 0x12d8fb 483; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 484; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8 485; CGP-NEXT: s_mov_b32 s5, 0xffed2705 486; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 487; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb 488; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 489; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x4996c7d8 490; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 491; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 492; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 493; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 494; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 495; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 496; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 497; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 498; CGP-NEXT: v_mul_lo_u32 v8, s5, v3 499; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 500; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 501; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 502; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 503; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 504; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 505; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 506; CGP-NEXT: v_mul_lo_u32 v3, v3, s4 507; CGP-NEXT: v_mul_lo_u32 v4, v4, s4 508; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 509; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 510; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 511; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5 512; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 513; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 514; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 515; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 516; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 517; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5 518; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 519; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 520; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 521; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 522; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 523; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 524; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 525; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 526; CGP-NEXT: s_setpc_b64 s[30:31] 527 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195> 528 ret <2 x i32> %result 529} 530 531define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) { 532; CHECK-LABEL: v_srem_i32_pow2_shl_denom: 533; CHECK: ; %bb.0: 534; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 536; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 537; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 538; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 539; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 540; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 541; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 542; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 543; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 544; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 545; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 546; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 547; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3 548; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 549; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 550; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3 551; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 552; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 553; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 554; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 555; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 556; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 557; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 558; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 559; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 560; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 561; CHECK-NEXT: s_setpc_b64 s[30:31] 562 %shl.y = shl i32 4096, %y 563 %r = srem i32 %x, %shl.y 564 ret i32 %r 565} 566 567define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 568; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom: 569; GISEL: ; %bb.0: 570; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GISEL-NEXT: s_movk_i32 s4, 0x1000 572; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 573; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 574; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 575; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 576; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 577; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 578; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2 579; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 580; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 581; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 582; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 583; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 584; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 585; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 586; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 587; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 588; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 589; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 590; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 591; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 592; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 593; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 594; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 595; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 596; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 597; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 598; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 599; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 600; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 601; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 602; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 603; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 604; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 605; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 606; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 607; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 608; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 609; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 610; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 611; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 612; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 613; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 614; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 615; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 616; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 617; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 618; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 619; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 620; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 621; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 622; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 623; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 624; GISEL-NEXT: s_setpc_b64 s[30:31] 625; 626; CGP-LABEL: v_srem_v2i32_pow2_shl_denom: 627; CGP: ; %bb.0: 628; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; CGP-NEXT: s_movk_i32 s4, 0x1000 630; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 631; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 632; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 633; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 634; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 635; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 636; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 637; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 638; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 639; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 640; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 641; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 642; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 643; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 644; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 645; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 646; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 647; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 648; CGP-NEXT: v_rcp_f32_e32 v6, v6 649; CGP-NEXT: v_rcp_f32_e32 v8, v8 650; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 651; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 652; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 653; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 654; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 655; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 656; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 657; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 658; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 659; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 660; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 661; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 662; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 663; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 664; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 665; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 666; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 667; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 668; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 669; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 670; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 671; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 672; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 673; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 674; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 675; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 676; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 677; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 678; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 679; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 680; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 681; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 682; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 683; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 684; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 685; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 686; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 687; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 688; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 689; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 690; CGP-NEXT: s_setpc_b64 s[30:31] 691 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 692 %r = srem <2 x i32> %x, %shl.y 693 ret <2 x i32> %r 694} 695 696define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { 697; GISEL-LABEL: v_srem_i32_24bit: 698; GISEL: ; %bb.0: 699; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 700; GISEL-NEXT: s_mov_b32 s4, 0xffffff 701; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 702; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 703; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 704; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 705; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 706; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 707; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 708; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 709; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 710; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 711; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 712; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 713; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 714; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 715; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 716; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 717; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 718; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 719; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 720; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 721; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 722; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 723; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 724; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 725; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 726; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 727; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 728; GISEL-NEXT: s_setpc_b64 s[30:31] 729; 730; CGP-LABEL: v_srem_i32_24bit: 731; CGP: ; %bb.0: 732; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 733; CGP-NEXT: s_mov_b32 s4, 0xffffff 734; CGP-NEXT: v_and_b32_e32 v0, s4, v0 735; CGP-NEXT: v_and_b32_e32 v1, s4, v1 736; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 737; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 738; CGP-NEXT: v_rcp_f32_e32 v2, v2 739; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 740; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 741; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 742; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 743; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 744; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 745; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 746; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 747; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 748; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 749; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 750; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 751; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 752; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 753; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 754; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 755; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 756; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 757; CGP-NEXT: s_setpc_b64 s[30:31] 758 %num.mask = and i32 %num, 16777215 759 %den.mask = and i32 %den, 16777215 760 %result = srem i32 %num.mask, %den.mask 761 ret i32 %result 762} 763 764define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 765; GISEL-LABEL: v_srem_v2i32_24bit: 766; GISEL: ; %bb.0: 767; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GISEL-NEXT: s_mov_b32 s4, 0xffffff 769; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 770; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 771; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 772; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 773; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 774; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 775; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 776; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 777; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 778; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 779; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 780; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 781; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 782; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 783; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 784; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 785; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 786; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 787; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 788; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 789; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 790; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 791; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 792; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 793; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 794; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 795; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 796; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 797; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 798; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 799; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 800; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 801; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 802; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 803; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 804; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 805; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 806; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 807; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 808; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 809; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 810; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 811; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 812; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 813; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 814; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 815; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 816; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 817; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 818; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 819; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 820; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 821; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 822; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 823; GISEL-NEXT: s_setpc_b64 s[30:31] 824; 825; CGP-LABEL: v_srem_v2i32_24bit: 826; CGP: ; %bb.0: 827; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 828; CGP-NEXT: s_mov_b32 s4, 0xffffff 829; CGP-NEXT: v_and_b32_e32 v0, s4, v0 830; CGP-NEXT: v_and_b32_e32 v1, s4, v1 831; CGP-NEXT: v_and_b32_e32 v2, s4, v2 832; CGP-NEXT: v_and_b32_e32 v3, s4, v3 833; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 834; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 835; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 836; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 837; CGP-NEXT: v_rcp_f32_e32 v4, v4 838; CGP-NEXT: v_rcp_f32_e32 v6, v6 839; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 840; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 841; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 842; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 843; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 844; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 845; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 846; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 847; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 848; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 849; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 850; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 851; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 852; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 853; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 854; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 855; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 856; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 857; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 858; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 859; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 860; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 861; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 862; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 863; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 864; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 865; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 866; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 867; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 868; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 869; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 870; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 871; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 872; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 873; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 874; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 875; CGP-NEXT: s_setpc_b64 s[30:31] 876 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 877 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 878 %result = srem <2 x i32> %num.mask, %den.mask 879 ret <2 x i32> %result 880} 881