1; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
2; vector intrinsics.
3;
4; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
5
6declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
7declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
8declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
9
10; PACKS_CC: i64 -> i32
11define <4 x i32> @f0() {
12; CHECK-LABEL: f0:
13; CHECK-LABEL: # %bb.0:
14; CHECK:       vpksgs %v24, %v0, %v0
15; CHECK-NEXT:  br %r14
16  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>)
17  %extr = extractvalue {<4 x i32>, i32} %call, 0
18  %trunc = trunc <4 x i32> %extr to <4 x i16>
19  %ret = sext <4 x i16> %trunc to <4 x i32>
20  ret <4 x i32> %ret
21}
22
23; PACKS_CC: i32 -> i16
24define <8 x i16> @f1() {
25; CHECK-LABEL: f1:
26; CHECK-LABEL: # %bb.0:
27; CHECK:       vpksfs %v24, %v0, %v0
28; CHECK-NEXT:  br %r14
29  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
30                                                  <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
31  %extr = extractvalue {<8 x i16>, i32} %call, 0
32  %trunc = trunc <8 x i16> %extr to <8 x i8>
33  %ret = sext <8 x i8> %trunc to <8 x i16>
34  ret <8 x i16> %ret
35}
36
37; PACKS_CC: i16 -> i8
38define <16 x i8> @f2() {
39; CHECK-LABEL: f2:
40; CHECK-LABEL: # %bb.0:
41; CHECK:       vpkshs %v24, %v0, %v0
42; CHECK-NEXT:  br %r14
43  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
44                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
45                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
46  %extr = extractvalue {<16 x i8>, i32} %call, 0
47  %trunc = trunc <16 x i8> %extr to <16 x i4>
48  %ret = sext <16 x i4> %trunc to <16 x i8>
49  ret <16 x i8> %ret
50}
51
52declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
53declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
54declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
55
56; PACKLS_CC: i64 -> i32
57define <4 x i32> @f3() {
58; CHECK-LABEL: f3:
59; CHECK-LABEL: # %bb.0:
60; CHECK:       vpklsgs %v24, %v1, %v0
61; CHECK-NEXT:  br %r14
62  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
63  %extr = extractvalue {<4 x i32>, i32} %call, 0
64  %trunc = trunc <4 x i32> %extr to <4 x i16>
65  %ret = sext <4 x i16> %trunc to <4 x i32>
66  ret <4 x i32> %ret
67}
68
69; PACKLS_CC: i32 -> i16
70define <8 x i16> @f4() {
71; CHECK-LABEL: f4:
72; CHECK-LABEL: # %bb.0:
73; CHECK:       vpklsfs %v24, %v0, %v0
74; CHECK-NEXT:  br %r14
75  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
76                                                   <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
77  %extr = extractvalue {<8 x i16>, i32} %call, 0
78  %trunc = trunc <8 x i16> %extr to <8 x i8>
79  %ret = sext <8 x i8> %trunc to <8 x i16>
80  ret <8 x i16> %ret
81}
82
83; PACKLS_CC: i16 -> i8
84define <16 x i8> @f5() {
85; CHECK-LABEL: f5:
86; CHECK-LABEL: # %bb.0:
87; CHECK:       vpklshs %v24, %v0, %v0
88; CHECK-NEXT:  br %r14
89  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
90                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
91                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
92  %extr = extractvalue {<16 x i8>, i32} %call, 0
93  %trunc = trunc <16 x i8> %extr to <16 x i4>
94  %ret = sext <16 x i4> %trunc to <16 x i8>
95  ret <16 x i8> %ret
96}
97
98declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
99declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
100declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
101
102; PACKS: i64 -> i32
103define <4 x i32> @f6() {
104; CHECK-LABEL: f6:
105; CHECK-LABEL: # %bb.0:
106; CHECK:       vpksg %v24, %v1, %v0
107; CHECK-NEXT:  br %r14
108  %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
109  %trunc = trunc <4 x i32> %call to <4 x i16>
110  %ret = sext <4 x i16> %trunc to <4 x i32>
111  ret <4 x i32> %ret
112}
113
114; PACKS: i32 -> i16
115define <8 x i16> @f7() {
116; CHECK-LABEL: f7:
117; CHECK-LABEL: # %bb.0:
118; CHECK:       vpksf %v24, %v0, %v0
119; CHECK-NEXT:  br %r14
120  %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
121                                          <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
122  %trunc = trunc <8 x i16> %call to <8 x i8>
123  %ret = sext <8 x i8> %trunc to <8 x i16>
124  ret <8 x i16> %ret
125}
126
127; PACKS: i16 -> i8
128define <16 x i8> @f8() {
129; CHECK-LABEL: f8:
130; CHECK-LABEL: # %bb.0:
131; CHECK:       vpksh %v24, %v0, %v0
132; CHECK-NEXT:  br %r14
133  %call = call <16 x i8> @llvm.s390.vpksh(
134                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
135                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
136  %trunc = trunc <16 x i8> %call to <16 x i4>
137  %ret = sext <16 x i4> %trunc to <16 x i8>
138  ret <16 x i8> %ret
139}
140
141declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
142declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
143declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
144
145; PACKLS: i64 -> i32
146define <4 x i32> @f9() {
147; CHECK-LABEL: f9:
148; CHECK-LABEL: # %bb.0:
149; CHECK:       vpklsg %v24, %v1, %v0
150; CHECK-NEXT:  br %r14
151  %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
152  %trunc = trunc <4 x i32> %call to <4 x i16>
153  %ret = sext <4 x i16> %trunc to <4 x i32>
154  ret <4 x i32> %ret
155}
156
157; PACKLS: i32 -> i16
158define <8 x i16> @f10() {
159; CHECK-LABEL: f10:
160; CHECK-LABEL: # %bb.0:
161; CHECK:       vpklsf %v24, %v0, %v0
162; CHECK-NEXT:  br %r14
163  %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
164                                           <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
165  %trunc = trunc <8 x i16> %call to <8 x i8>
166  %ret = sext <8 x i8> %trunc to <8 x i16>
167  ret <8 x i16> %ret
168}
169
170; PACKLS: i16 -> i8
171define <16 x i8> @f11() {
172; CHECK-LABEL: f11:
173; CHECK-LABEL: # %bb.0:
174; CHECK:       vpklsh %v24, %v0, %v0
175; CHECK-NEXT:  br %r14
176  %call = call <16 x i8> @llvm.s390.vpklsh(
177                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
178                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
179  %trunc = trunc <16 x i8> %call to <16 x i4>
180  %ret = sext <16 x i4> %trunc to <16 x i8>
181  ret <16 x i8> %ret
182}
183
184declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
185
186; VPDI:
187define <2 x i64> @f12() {
188; CHECK-LABEL: f12:
189; CHECK-LABEL: # %bb.0:
190; CHECK:      vpdi %v24, %v1, %v0, 0
191; CHECK-NEXT: br %r14
192  %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>,
193                                         <2 x i64> <i64 1, i64 0>, i32 0)
194  %trunc = trunc <2 x i64> %perm to <2 x i32>
195  %ret = sext <2 x i32> %trunc to <2 x i64>
196  ret <2 x i64> %ret
197}
198
199declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
200
201; VSLDB:
202define <16 x i8> @f13() {
203; CHECK-LABEL: f13:
204; CHECK-LABEL: # %bb.0:
205; CHECK:      vsldb %v24, %v0, %v0, 1
206; CHECK-NEXT: br %r14
207  %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
208                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
209                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8>
210                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
211                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
212                  i32 1)
213  %trunc = trunc <16 x i8> %shfd to <16 x i4>
214  %ret = sext <16 x i4> %trunc to <16 x i8>
215  ret <16 x i8> %ret
216}
217
218declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
219
220; Test VPERM:
221define <16 x i8> @f14() {
222; CHECK-LABEL: f14:
223; CHECK-LABEL: # %bb.0:
224; CHECK:      vperm %v24, %v0, %v0, %v0
225; CHECK-NEXT: br %r14
226  %perm = call <16 x i8> @llvm.s390.vperm(
227                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
228                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
229                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
230                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
231                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
232                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>)
233  %trunc = trunc <16 x i8> %perm to <16 x i4>
234  %ret = sext <16 x i4> %trunc to <16 x i8>
235  ret <16 x i8> %ret
236}
237