1; Test vector extraction of byte-swapped value to memory.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
4
5declare i16 @llvm.bswap.i16(i16)
6declare i32 @llvm.bswap.i32(i32)
7declare i64 @llvm.bswap.i64(i64)
8declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
9declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
10declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
11
12; Test v8i16 extraction from the first element.
13define void @f1(<8 x i16> %val, i16 *%ptr) {
14; CHECK-LABEL: f1:
15; CHECK: vstebrh %v24, 0(%r2), 0
16; CHECK: br %r14
17  %element = extractelement <8 x i16> %val, i32 0
18  %swap = call i16 @llvm.bswap.i16(i16 %element)
19  store i16 %swap, i16 *%ptr
20  ret void
21}
22
23; Test v8i16 extraction from the last element.
24define void @f2(<8 x i16> %val, i16 *%ptr) {
25; CHECK-LABEL: f2:
26; CHECK: vstebrh %v24, 0(%r2), 7
27; CHECK: br %r14
28  %element = extractelement <8 x i16> %val, i32 7
29  %swap = call i16 @llvm.bswap.i16(i16 %element)
30  store i16 %swap, i16 *%ptr
31  ret void
32}
33
34; Test v8i16 extraction of an invalid element.  This must compile,
35; but we don't care what it does.
36define void @f3(<8 x i16> %val, i16 *%ptr) {
37; CHECK-LABEL: f3:
38; CHECK-NOT: vstebrh %v24, 0(%r2), 8
39; CHECK: br %r14
40  %element = extractelement <8 x i16> %val, i32 8
41  %swap = call i16 @llvm.bswap.i16(i16 %element)
42  store i16 %swap, i16 *%ptr
43  ret void
44}
45
46; Test v8i16 extraction with the highest in-range offset.
47define void @f4(<8 x i16> %val, i16 *%base) {
48; CHECK-LABEL: f4:
49; CHECK: vstebrh %v24, 4094(%r2), 5
50; CHECK: br %r14
51  %ptr = getelementptr i16, i16 *%base, i32 2047
52  %element = extractelement <8 x i16> %val, i32 5
53  %swap = call i16 @llvm.bswap.i16(i16 %element)
54  store i16 %swap, i16 *%ptr
55  ret void
56}
57
58; Test v8i16 extraction with the first ouf-of-range offset.
59define void @f5(<8 x i16> %val, i16 *%base) {
60; CHECK-LABEL: f5:
61; CHECK: aghi %r2, 4096
62; CHECK: vstebrh %v24, 0(%r2), 1
63; CHECK: br %r14
64  %ptr = getelementptr i16, i16 *%base, i32 2048
65  %element = extractelement <8 x i16> %val, i32 1
66  %swap = call i16 @llvm.bswap.i16(i16 %element)
67  store i16 %swap, i16 *%ptr
68  ret void
69}
70
71; Test v8i16 extraction from a variable element.
72define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) {
73; CHECK-LABEL: f6:
74; CHECK-NOT: vstebrh
75; CHECK: br %r14
76  %element = extractelement <8 x i16> %val, i32 %index
77  %swap = call i16 @llvm.bswap.i16(i16 %element)
78  store i16 %swap, i16 *%ptr
79  ret void
80}
81
82; Test v8i16 extraction using a vector bswap.
83define void @f7(<8 x i16> %val, i16 *%ptr) {
84; CHECK-LABEL: f7:
85; CHECK: vstebrh %v24, 0(%r2), 0
86; CHECK: br %r14
87  %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
88  %element = extractelement <8 x i16> %swap, i32 0
89  store i16 %element, i16 *%ptr
90  ret void
91}
92
93; Test v4i32 extraction from the first element.
94define void @f8(<4 x i32> %val, i32 *%ptr) {
95; CHECK-LABEL: f8:
96; CHECK: vstebrf %v24, 0(%r2), 0
97; CHECK: br %r14
98  %element = extractelement <4 x i32> %val, i32 0
99  %swap = call i32 @llvm.bswap.i32(i32 %element)
100  store i32 %swap, i32 *%ptr
101  ret void
102}
103
104; Test v4i32 extraction from the last element.
105define void @f9(<4 x i32> %val, i32 *%ptr) {
106; CHECK-LABEL: f9:
107; CHECK: vstebrf %v24, 0(%r2), 3
108; CHECK: br %r14
109  %element = extractelement <4 x i32> %val, i32 3
110  %swap = call i32 @llvm.bswap.i32(i32 %element)
111  store i32 %swap, i32 *%ptr
112  ret void
113}
114
115; Test v4i32 extraction of an invalid element.  This must compile,
116; but we don't care what it does.
117define void @f10(<4 x i32> %val, i32 *%ptr) {
118; CHECK-LABEL: f10:
119; CHECK-NOT: vstebrf %v24, 0(%r2), 4
120; CHECK: br %r14
121  %element = extractelement <4 x i32> %val, i32 4
122  %swap = call i32 @llvm.bswap.i32(i32 %element)
123  store i32 %swap, i32 *%ptr
124  ret void
125}
126
127; Test v4i32 extraction with the highest in-range offset.
128define void @f11(<4 x i32> %val, i32 *%base) {
129; CHECK-LABEL: f11:
130; CHECK: vstebrf %v24, 4092(%r2), 2
131; CHECK: br %r14
132  %ptr = getelementptr i32, i32 *%base, i32 1023
133  %element = extractelement <4 x i32> %val, i32 2
134  %swap = call i32 @llvm.bswap.i32(i32 %element)
135  store i32 %swap, i32 *%ptr
136  ret void
137}
138
139; Test v4i32 extraction with the first ouf-of-range offset.
140define void @f12(<4 x i32> %val, i32 *%base) {
141; CHECK-LABEL: f12:
142; CHECK: aghi %r2, 4096
143; CHECK: vstebrf %v24, 0(%r2), 1
144; CHECK: br %r14
145  %ptr = getelementptr i32, i32 *%base, i32 1024
146  %element = extractelement <4 x i32> %val, i32 1
147  %swap = call i32 @llvm.bswap.i32(i32 %element)
148  store i32 %swap, i32 *%ptr
149  ret void
150}
151
152; Test v4i32 extraction from a variable element.
153define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) {
154; CHECK-LABEL: f13:
155; CHECK-NOT: vstebrf
156; CHECK: br %r14
157  %element = extractelement <4 x i32> %val, i32 %index
158  %swap = call i32 @llvm.bswap.i32(i32 %element)
159  store i32 %swap, i32 *%ptr
160  ret void
161}
162
163; Test v4i32 extraction using a vector bswap.
164define void @f14(<4 x i32> %val, i32 *%ptr) {
165; CHECK-LABEL: f14:
166; CHECK: vstebrf %v24, 0(%r2), 0
167; CHECK: br %r14
168  %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
169  %element = extractelement <4 x i32> %swap, i32 0
170  store i32 %element, i32 *%ptr
171  ret void
172}
173
174; Test v2i64 extraction from the first element.
175define void @f15(<2 x i64> %val, i64 *%ptr) {
176; CHECK-LABEL: f15:
177; CHECK: vstebrg %v24, 0(%r2), 0
178; CHECK: br %r14
179  %element = extractelement <2 x i64> %val, i32 0
180  %swap = call i64 @llvm.bswap.i64(i64 %element)
181  store i64 %swap, i64 *%ptr
182  ret void
183}
184
185; Test v2i64 extraction from the last element.
186define void @f16(<2 x i64> %val, i64 *%ptr) {
187; CHECK-LABEL: f16:
188; CHECK: vstebrg %v24, 0(%r2), 1
189; CHECK: br %r14
190  %element = extractelement <2 x i64> %val, i32 1
191  %swap = call i64 @llvm.bswap.i64(i64 %element)
192  store i64 %swap, i64 *%ptr
193  ret void
194}
195
196; Test v2i64 extraction of an invalid element.  This must compile,
197; but we don't care what it does.
198define void @f17(<2 x i64> %val, i64 *%ptr) {
199; CHECK-LABEL: f17:
200; CHECK-NOT: vstebrg %v24, 0(%r2), 2
201; CHECK: br %r14
202  %element = extractelement <2 x i64> %val, i32 2
203  %swap = call i64 @llvm.bswap.i64(i64 %element)
204  store i64 %swap, i64 *%ptr
205  ret void
206}
207
208; Test v2i64 extraction with the highest in-range offset.
209define void @f18(<2 x i64> %val, i64 *%base) {
210; CHECK-LABEL: f18:
211; CHECK: vstebrg %v24, 4088(%r2), 1
212; CHECK: br %r14
213  %ptr = getelementptr i64, i64 *%base, i32 511
214  %element = extractelement <2 x i64> %val, i32 1
215  %swap = call i64 @llvm.bswap.i64(i64 %element)
216  store i64 %swap, i64 *%ptr
217  ret void
218}
219
220; Test v2i64 extraction with the first ouf-of-range offset.
221define void @f19(<2 x i64> %val, i64 *%base) {
222; CHECK-LABEL: f19:
223; CHECK: aghi %r2, 4096
224; CHECK: vstebrg %v24, 0(%r2), 0
225; CHECK: br %r14
226  %ptr = getelementptr i64, i64 *%base, i32 512
227  %element = extractelement <2 x i64> %val, i32 0
228  %swap = call i64 @llvm.bswap.i64(i64 %element)
229  store i64 %swap, i64 *%ptr
230  ret void
231}
232
233; Test v2i64 extraction from a variable element.
234define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
235; CHECK-LABEL: f20:
236; CHECK-NOT: vstebrg
237; CHECK: br %r14
238  %element = extractelement <2 x i64> %val, i32 %index
239  %swap = call i64 @llvm.bswap.i64(i64 %element)
240  store i64 %swap, i64 *%ptr
241  ret void
242}
243
244; Test v2i64 extraction using a vector bswap.
245define void @f21(<2 x i64> %val, i64 *%ptr) {
246; CHECK-LABEL: f21:
247; CHECK: vstebrg %v24, 0(%r2), 0
248; CHECK: br %r14
249  %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
250  %element = extractelement <2 x i64> %swap, i32 0
251  store i64 %element, i64 *%ptr
252  ret void
253}
254
255