1; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dsp < %s | FileCheck %s -check-prefix=R1
2; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
3
4; R1-LABEL: test_lbux:
5; R1: lbux ${{[0-9]+}}
6
7define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
8entry:
9  %add.ptr = getelementptr inbounds i8, i8* %b, i32 %i
10  %0 = load i8, i8* %add.ptr, align 1
11  ret i8 %0
12}
13
14; R1-LABEL: test_lhx:
15; R1: lhx ${{[0-9]+}}
16
17define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
18entry:
19  %add.ptr = getelementptr inbounds i16, i16* %b, i32 %i
20  %0 = load i16, i16* %add.ptr, align 2
21  ret i16 %0
22}
23
24; R1-LABEL: test_lwx:
25; R1: lwx ${{[0-9]+}}
26
27define i32 @test_lwx(i32* nocapture %b, i32 %i) {
28entry:
29  %add.ptr = getelementptr inbounds i32, i32* %b, i32 %i
30  %0 = load i32, i32* %add.ptr, align 4
31  ret i32 %0
32}
33
34; R1-LABEL: test_add_v2q15_:
35; R1: addq.ph ${{[0-9]+}}
36
37define { i32 } @test_add_v2q15_(i32 %a.coerce, i32 %b.coerce) {
38entry:
39  %0 = bitcast i32 %a.coerce to <2 x i16>
40  %1 = bitcast i32 %b.coerce to <2 x i16>
41  %add = add <2 x i16> %0, %1
42  %2 = bitcast <2 x i16> %add to i32
43  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
44  ret { i32 } %.fca.0.insert
45}
46
47; R1-LABEL: test_sub_v2q15_:
48; R1: subq.ph ${{[0-9]+}}
49
50define { i32 } @test_sub_v2q15_(i32 %a.coerce, i32 %b.coerce) {
51entry:
52  %0 = bitcast i32 %a.coerce to <2 x i16>
53  %1 = bitcast i32 %b.coerce to <2 x i16>
54  %sub = sub <2 x i16> %0, %1
55  %2 = bitcast <2 x i16> %sub to i32
56  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
57  ret { i32 } %.fca.0.insert
58}
59
60; R2-LABEL: test_mul_v2q15_:
61; R2: mul.ph ${{[0-9]+}}
62
63; mul.ph is an R2 instruction. Check that multiply node gets expanded.
64; R1-LABEL: test_mul_v2q15_:
65; R1: mul ${{[0-9]+}}
66; R1: mul ${{[0-9]+}}
67
68define { i32 } @test_mul_v2q15_(i32 %a.coerce, i32 %b.coerce) {
69entry:
70  %0 = bitcast i32 %a.coerce to <2 x i16>
71  %1 = bitcast i32 %b.coerce to <2 x i16>
72  %mul = mul <2 x i16> %0, %1
73  %2 = bitcast <2 x i16> %mul to i32
74  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
75  ret { i32 } %.fca.0.insert
76}
77
78; R1-LABEL: test_add_v4i8_:
79; R1: addu.qb ${{[0-9]+}}
80
81define { i32 } @test_add_v4i8_(i32 %a.coerce, i32 %b.coerce) {
82entry:
83  %0 = bitcast i32 %a.coerce to <4 x i8>
84  %1 = bitcast i32 %b.coerce to <4 x i8>
85  %add = add <4 x i8> %0, %1
86  %2 = bitcast <4 x i8> %add to i32
87  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
88  ret { i32 } %.fca.0.insert
89}
90
91; R1-LABEL: test_sub_v4i8_:
92; R1: subu.qb ${{[0-9]+}}
93
94define { i32 } @test_sub_v4i8_(i32 %a.coerce, i32 %b.coerce) {
95entry:
96  %0 = bitcast i32 %a.coerce to <4 x i8>
97  %1 = bitcast i32 %b.coerce to <4 x i8>
98  %sub = sub <4 x i8> %0, %1
99  %2 = bitcast <4 x i8> %sub to i32
100  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
101  ret { i32 } %.fca.0.insert
102}
103
104; DSP-ASE doesn't have a v4i8 multiply instruction. Check that multiply node gets expanded.
105; R2-LABEL: test_mul_v4i8_:
106; R2: mul ${{[0-9]+}}
107; R2: mul ${{[0-9]+}}
108; R2: mul ${{[0-9]+}}
109; R2: mul ${{[0-9]+}}
110
111define { i32 } @test_mul_v4i8_(i32 %a.coerce, i32 %b.coerce) {
112entry:
113  %0 = bitcast i32 %a.coerce to <4 x i8>
114  %1 = bitcast i32 %b.coerce to <4 x i8>
115  %mul = mul <4 x i8> %0, %1
116  %2 = bitcast <4 x i8> %mul to i32
117  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
118  ret { i32 } %.fca.0.insert
119}
120
121; R1-LABEL: test_addsc:
122; R1: addsc ${{[0-9]+}}
123; R1: addwc ${{[0-9]+}}
124
125define i64 @test_addsc(i64 %a, i64 %b) {
126entry:
127  %add = add nsw i64 %b, %a
128  ret i64 %add
129}
130
131; R1-LABEL: shift1_v2i16_shl_:
132; R1: shll.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
133
134define { i32 } @shift1_v2i16_shl_(i32 %a0.coerce) {
135entry:
136  %0 = bitcast i32 %a0.coerce to <2 x i16>
137  %shl = shl <2 x i16> %0, <i16 15, i16 15>
138  %1 = bitcast <2 x i16> %shl to i32
139  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
140  ret { i32 } %.fca.0.insert
141}
142
143; R1-LABEL: shift1_v2i16_sra_:
144; R1: shra.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
145
146define { i32 } @shift1_v2i16_sra_(i32 %a0.coerce) {
147entry:
148  %0 = bitcast i32 %a0.coerce to <2 x i16>
149  %shr = ashr <2 x i16> %0, <i16 15, i16 15>
150  %1 = bitcast <2 x i16> %shr to i32
151  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
152  ret { i32 } %.fca.0.insert
153}
154
155; R1-LABEL: shift1_v2ui16_srl_:
156; R1-NOT: shrl.ph
157; R2-LABEL: shift1_v2ui16_srl_:
158; R2: shrl.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
159
160define { i32 } @shift1_v2ui16_srl_(i32 %a0.coerce) {
161entry:
162  %0 = bitcast i32 %a0.coerce to <2 x i16>
163  %shr = lshr <2 x i16> %0, <i16 15, i16 15>
164  %1 = bitcast <2 x i16> %shr to i32
165  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
166  ret { i32 } %.fca.0.insert
167}
168
169; R1-LABEL: shift1_v4i8_shl_:
170; R1: shll.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
171
172define { i32 } @shift1_v4i8_shl_(i32 %a0.coerce) {
173entry:
174  %0 = bitcast i32 %a0.coerce to <4 x i8>
175  %shl = shl <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
176  %1 = bitcast <4 x i8> %shl to i32
177  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
178  ret { i32 } %.fca.0.insert
179}
180
181; R1-LABEL: shift1_v4i8_sra_:
182; R1-NOT: shra.qb
183; R2-LABEL: shift1_v4i8_sra_:
184; R2: shra.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
185
186define { i32 } @shift1_v4i8_sra_(i32 %a0.coerce) {
187entry:
188  %0 = bitcast i32 %a0.coerce to <4 x i8>
189  %shr = ashr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
190  %1 = bitcast <4 x i8> %shr to i32
191  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
192  ret { i32 } %.fca.0.insert
193}
194
195; R1-LABEL: shift1_v4ui8_srl_:
196; R1: shrl.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
197
198define { i32 } @shift1_v4ui8_srl_(i32 %a0.coerce) {
199entry:
200  %0 = bitcast i32 %a0.coerce to <4 x i8>
201  %shr = lshr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
202  %1 = bitcast <4 x i8> %shr to i32
203  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
204  ret { i32 } %.fca.0.insert
205}
206
207; Check that shift node is expanded if splat element size is not 16-bit.
208;
209; R1-LABEL: test_vector_splat_imm_v2q15:
210; R1-NOT: shll.ph
211
212define { i32 } @test_vector_splat_imm_v2q15(i32 %a.coerce) {
213entry:
214  %0 = bitcast i32 %a.coerce to <2 x i16>
215  %shl = shl <2 x i16> %0, <i16 0, i16 2>
216  %1 = bitcast <2 x i16> %shl to i32
217  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
218  ret { i32 } %.fca.0.insert
219}
220
221; Check that shift node is expanded if splat element size is not 8-bit.
222;
223; R1-LABEL: test_vector_splat_imm_v4i8:
224; R1-NOT: shll.qb
225
226define { i32 } @test_vector_splat_imm_v4i8(i32 %a.coerce) {
227entry:
228  %0 = bitcast i32 %a.coerce to <4 x i8>
229  %shl = shl <4 x i8> %0, <i8 0, i8 2, i8 0, i8 2>
230  %1 = bitcast <4 x i8> %shl to i32
231  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
232  ret { i32 } %.fca.0.insert
233}
234
235; Check that shift node is expanded if shift amount doesn't fit in 4-bit sa field.
236;
237; R1-LABEL: test_shift_amount_v2q15:
238; R1-NOT: shll.ph
239
240define { i32 } @test_shift_amount_v2q15(i32 %a.coerce) {
241entry:
242  %0 = bitcast i32 %a.coerce to <2 x i16>
243  %shl = shl <2 x i16> %0, <i16 16, i16 16>
244  %1 = bitcast <2 x i16> %shl to i32
245  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
246  ret { i32 } %.fca.0.insert
247}
248
249; Check that shift node is expanded if shift amount doesn't fit in 3-bit sa field.
250;
251; R1-LABEL: test_shift_amount_v4i8:
252; R1-NOT: shll.qb
253
254define { i32 } @test_shift_amount_v4i8(i32 %a.coerce) {
255entry:
256  %0 = bitcast i32 %a.coerce to <4 x i8>
257  %shl = shl <4 x i8> %0, <i8 8, i8 8, i8 8, i8 8>
258  %1 = bitcast <4 x i8> %shl to i32
259  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
260  ret { i32 } %.fca.0.insert
261}
262