1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-codegenprepare-mul24=0 < %s | FileCheck -check-prefix=GFX9 %s 3 4define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) { 5; GFX9-LABEL: num_sign_bits_mul_i48_0: 6; GFX9: ; %bb.0: 7; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX9-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 9; GFX9-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 10; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %A = sext i8 %X to i48 13 %B = sext i8 %Y to i48 14 %C = sext i8 %Z to i48 15 %D = sext i8 %W to i48 16 %mul0 = mul i48 %A, %B 17 %mul1 = mul i48 %C, %D 18 %mul2 = mul i48 %mul0, %mul1 19 %trunc = trunc i48 %mul2 to i16 20 ret i16 %trunc 21} 22 23define i16 @num_sign_bits_mul_i48_1(i8 %X, i8 %Y, i8 %Z, i8 %W) { 24; GFX9-LABEL: num_sign_bits_mul_i48_1: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 28; GFX9-NEXT: v_mul_i32_i24_sdwa v2, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 29; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2 30; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v2 31; GFX9-NEXT: v_lshrrev_b64 v[0:1], 24, v[0:1] 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33 %A = sext i8 %X to i48 34 %B = sext i8 %Y to i48 35 %C = sext i8 %Z to i48 36 %D = sext i8 %W to i48 37 %mul0 = mul i48 %A, %B 38 %mul1 = mul i48 %C, %D 39 %mul2 = mul i48 %mul0, %mul1 40 %ashr = ashr i48 %mul2, 24 41 %trunc = trunc i48 %ashr to i16 42 ret i16 %trunc 43} 44 45define i32 @num_sign_bits_mul_i32_7(i32 %x, i32 %y, i32 %z, i32 %w) { 46; GFX9-LABEL: num_sign_bits_mul_i32_7: 47; GFX9: ; %bb.0: 48; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 25 50; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 25 51; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 25 52; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 25 53; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 54; GFX9-NEXT: v_mul_lo_u32 v1, v2, v3 55; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 56; GFX9-NEXT: s_setpc_b64 s[30:31] 57 %x.shl = shl i32 %x, 7 58 %x.bits = ashr i32 %x.shl, 7 59 60 %y.shl = shl i32 %y, 7 61 %y.bits = ashr i32 %y.shl, 7 62 63 %z.shl = shl i32 %z, 7 64 %z.bits = ashr i32 %z.shl, 7 65 66 %w.shl = shl i32 %w, 7 67 %w.bits = ashr i32 %w.shl, 7 68 69 %mul0 = mul i32 %x.bits, %y.bits 70 %mul1 = mul i32 %z.bits, %w.bits 71 %mul2 = mul i32 %mul0, %mul1 72 ret i32 %mul2 73} 74 75define i32 @num_sign_bits_mul_i32_8(i32 %x, i32 %y, i32 %z, i32 %w) { 76; GFX9-LABEL: num_sign_bits_mul_i32_8: 77; GFX9: ; %bb.0: 78; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 80; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3 81; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 82; GFX9-NEXT: s_setpc_b64 s[30:31] 83 %x.shl = shl i32 %x, 8 84 %x.bits = ashr i32 %x.shl, 8 85 86 %y.shl = shl i32 %y, 8 87 %y.bits = ashr i32 %y.shl, 8 88 89 %z.shl = shl i32 %z, 8 90 %z.bits = ashr i32 %z.shl, 8 91 92 %w.shl = shl i32 %w, 8 93 %w.bits = ashr i32 %w.shl, 8 94 95 %mul0 = mul i32 %x.bits, %y.bits 96 %mul1 = mul i32 %z.bits, %w.bits 97 %mul2 = mul i32 %mul0, %mul1 98 ret i32 %mul2 99} 100 101define i32 @num_sign_bits_mul_i32_9(i32 %x, i32 %y, i32 %z, i32 %w) { 102; GFX9-LABEL: num_sign_bits_mul_i32_9: 103; GFX9: ; %bb.0: 104; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 23 106; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 23 107; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 23 108; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 23 109; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 110; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3 111; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 112; GFX9-NEXT: s_setpc_b64 s[30:31] 113 %x.shl = shl i32 %x, 9 114 %x.bits = ashr i32 %x.shl, 9 115 116 %y.shl = shl i32 %y, 9 117 %y.bits = ashr i32 %y.shl, 9 118 119 %z.shl = shl i32 %z, 9 120 %z.bits = ashr i32 %z.shl, 9 121 122 %w.shl = shl i32 %w, 9 123 %w.bits = ashr i32 %w.shl, 9 124 125 %mul0 = mul i32 %x.bits, %y.bits 126 %mul1 = mul i32 %z.bits, %w.bits 127 %mul2 = mul i32 %mul0, %mul1 128 ret i32 %mul2 129} 130 131define i32 @num_sign_bits_mul_i32_10(i32 %x, i32 %y, i32 %z, i32 %w) { 132; GFX9-LABEL: num_sign_bits_mul_i32_10: 133; GFX9: ; %bb.0: 134; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 22 136; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 22 137; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 22 138; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 22 139; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 140; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3 141; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 142; GFX9-NEXT: s_setpc_b64 s[30:31] 143 %x.shl = shl i32 %x, 10 144 %x.bits = ashr i32 %x.shl, 10 145 146 %y.shl = shl i32 %y, 10 147 %y.bits = ashr i32 %y.shl, 10 148 149 %z.shl = shl i32 %z, 10 150 %z.bits = ashr i32 %z.shl, 10 151 152 %w.shl = shl i32 %w, 10 153 %w.bits = ashr i32 %w.shl, 10 154 155 %mul0 = mul i32 %x.bits, %y.bits 156 %mul1 = mul i32 %z.bits, %w.bits 157 %mul2 = mul i32 %mul0, %mul1 158 ret i32 %mul2 159} 160 161define i32 @known_bits_mul24() { 162; GFX9-LABEL: known_bits_mul24: 163; GFX9: ; %bb.0: 164; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX9-NEXT: v_mov_b32_e32 v0, 0 166; GFX9-NEXT: s_setpc_b64 s[30:31] 167 %r0 = call i32 @llvm.amdgcn.mul.i24(i32 0, i32 -7) 168 %r1 = shl i32 %r0, 2 169 ret i32 %r1 170} 171 172declare i32 @llvm.amdgcn.mul.i24(i32, i32) 173