1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc  -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-codegenprepare-mul24=0 < %s | FileCheck -check-prefix=GFX9 %s
3
4define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) {
5; GFX9-LABEL: num_sign_bits_mul_i48_0:
6; GFX9:       ; %bb.0:
7; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX9-NEXT:    v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
9; GFX9-NEXT:    v_mul_i32_i24_sdwa v1, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
10; GFX9-NEXT:    v_mul_i32_i24_e32 v0, v0, v1
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %A = sext i8 %X to i48
13  %B = sext i8 %Y to i48
14  %C = sext i8 %Z to i48
15  %D = sext i8 %W to i48
16  %mul0 = mul i48 %A, %B
17  %mul1 = mul i48 %C, %D
18  %mul2 = mul i48 %mul0, %mul1
19  %trunc = trunc i48 %mul2 to i16
20  ret i16 %trunc
21}
22
23define i16 @num_sign_bits_mul_i48_1(i8 %X, i8 %Y, i8 %Z, i8 %W) {
24; GFX9-LABEL: num_sign_bits_mul_i48_1:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
28; GFX9-NEXT:    v_mul_i32_i24_sdwa v2, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
29; GFX9-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v2
30; GFX9-NEXT:    v_mul_i32_i24_e32 v0, v0, v2
31; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 24, v[0:1]
32; GFX9-NEXT:    s_setpc_b64 s[30:31]
33  %A = sext i8 %X to i48
34  %B = sext i8 %Y to i48
35  %C = sext i8 %Z to i48
36  %D = sext i8 %W to i48
37  %mul0 = mul i48 %A, %B
38  %mul1 = mul i48 %C, %D
39  %mul2 = mul i48 %mul0, %mul1
40  %ashr = ashr i48 %mul2, 24
41  %trunc = trunc i48 %ashr to i16
42  ret i16 %trunc
43}
44
45define i32 @num_sign_bits_mul_i32_7(i32 %x, i32 %y, i32 %z, i32 %w) {
46; GFX9-LABEL: num_sign_bits_mul_i32_7:
47; GFX9:       ; %bb.0:
48; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 25
50; GFX9-NEXT:    v_bfe_i32 v1, v1, 0, 25
51; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 25
52; GFX9-NEXT:    v_bfe_i32 v3, v3, 0, 25
53; GFX9-NEXT:    v_mul_lo_u32 v0, v0, v1
54; GFX9-NEXT:    v_mul_lo_u32 v1, v2, v3
55; GFX9-NEXT:    v_mul_lo_u32 v0, v0, v1
56; GFX9-NEXT:    s_setpc_b64 s[30:31]
57  %x.shl = shl i32 %x, 7
58  %x.bits = ashr i32 %x.shl, 7
59
60  %y.shl = shl i32 %y, 7
61  %y.bits = ashr i32 %y.shl, 7
62
63  %z.shl = shl i32 %z, 7
64  %z.bits = ashr i32 %z.shl, 7
65
66  %w.shl = shl i32 %w, 7
67  %w.bits = ashr i32 %w.shl, 7
68
69  %mul0 = mul i32 %x.bits, %y.bits
70  %mul1 = mul i32 %z.bits, %w.bits
71  %mul2 = mul i32 %mul0, %mul1
72  ret i32 %mul2
73}
74
75define i32 @num_sign_bits_mul_i32_8(i32 %x, i32 %y, i32 %z, i32 %w) {
76; GFX9-LABEL: num_sign_bits_mul_i32_8:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NEXT:    v_mul_i32_i24_e32 v0, v0, v1
80; GFX9-NEXT:    v_mul_i32_i24_e32 v1, v2, v3
81; GFX9-NEXT:    v_mul_lo_u32 v0, v0, v1
82; GFX9-NEXT:    s_setpc_b64 s[30:31]
83  %x.shl = shl i32 %x, 8
84  %x.bits = ashr i32 %x.shl, 8
85
86  %y.shl = shl i32 %y, 8
87  %y.bits = ashr i32 %y.shl, 8
88
89  %z.shl = shl i32 %z, 8
90  %z.bits = ashr i32 %z.shl, 8
91
92  %w.shl = shl i32 %w, 8
93  %w.bits = ashr i32 %w.shl, 8
94
95  %mul0 = mul i32 %x.bits, %y.bits
96  %mul1 = mul i32 %z.bits, %w.bits
97  %mul2 = mul i32 %mul0, %mul1
98  ret i32 %mul2
99}
100
101define i32 @num_sign_bits_mul_i32_9(i32 %x, i32 %y, i32 %z, i32 %w) {
102; GFX9-LABEL: num_sign_bits_mul_i32_9:
103; GFX9:       ; %bb.0:
104; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 23
106; GFX9-NEXT:    v_bfe_i32 v1, v1, 0, 23
107; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 23
108; GFX9-NEXT:    v_bfe_i32 v3, v3, 0, 23
109; GFX9-NEXT:    v_mul_i32_i24_e32 v0, v0, v1
110; GFX9-NEXT:    v_mul_i32_i24_e32 v1, v2, v3
111; GFX9-NEXT:    v_mul_lo_u32 v0, v0, v1
112; GFX9-NEXT:    s_setpc_b64 s[30:31]
113  %x.shl = shl i32 %x, 9
114  %x.bits = ashr i32 %x.shl, 9
115
116  %y.shl = shl i32 %y, 9
117  %y.bits = ashr i32 %y.shl, 9
118
119  %z.shl = shl i32 %z, 9
120  %z.bits = ashr i32 %z.shl, 9
121
122  %w.shl = shl i32 %w, 9
123  %w.bits = ashr i32 %w.shl, 9
124
125  %mul0 = mul i32 %x.bits, %y.bits
126  %mul1 = mul i32 %z.bits, %w.bits
127  %mul2 = mul i32 %mul0, %mul1
128  ret i32 %mul2
129}
130
131define i32 @num_sign_bits_mul_i32_10(i32 %x, i32 %y, i32 %z, i32 %w) {
132; GFX9-LABEL: num_sign_bits_mul_i32_10:
133; GFX9:       ; %bb.0:
134; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 22
136; GFX9-NEXT:    v_bfe_i32 v1, v1, 0, 22
137; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 22
138; GFX9-NEXT:    v_bfe_i32 v3, v3, 0, 22
139; GFX9-NEXT:    v_mul_i32_i24_e32 v0, v0, v1
140; GFX9-NEXT:    v_mul_i32_i24_e32 v1, v2, v3
141; GFX9-NEXT:    v_mul_lo_u32 v0, v0, v1
142; GFX9-NEXT:    s_setpc_b64 s[30:31]
143  %x.shl = shl i32 %x, 10
144  %x.bits = ashr i32 %x.shl, 10
145
146  %y.shl = shl i32 %y, 10
147  %y.bits = ashr i32 %y.shl, 10
148
149  %z.shl = shl i32 %z, 10
150  %z.bits = ashr i32 %z.shl, 10
151
152  %w.shl = shl i32 %w, 10
153  %w.bits = ashr i32 %w.shl, 10
154
155  %mul0 = mul i32 %x.bits, %y.bits
156  %mul1 = mul i32 %z.bits, %w.bits
157  %mul2 = mul i32 %mul0, %mul1
158  ret i32 %mul2
159}
160
161define i32 @known_bits_mul24() {
162; GFX9-LABEL: known_bits_mul24:
163; GFX9:       ; %bb.0:
164; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX9-NEXT:    v_mov_b32_e32 v0, 0
166; GFX9-NEXT:    s_setpc_b64 s[30:31]
167  %r0 = call i32 @llvm.amdgcn.mul.i24(i32 0, i32 -7)
168  %r1 = shl i32 %r0, 2
169  ret i32 %r1
170}
171
172declare i32 @llvm.amdgcn.mul.i24(i32, i32)
173