1; Test vector insertion of byte-swapped memory values.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
4
5declare i16 @llvm.bswap.i16(i16)
6declare i32 @llvm.bswap.i32(i32)
7declare i64 @llvm.bswap.i64(i64)
8declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
9declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
10declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
11
12; Test v8i16 insertion into the first element.
13define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) {
14; CHECK-LABEL: f1:
15; CHECK: vlebrh %v24, 0(%r2), 0
16; CHECK: br %r14
17  %element = load i16, i16 *%ptr
18  %swap = call i16 @llvm.bswap.i16(i16 %element)
19  %ret = insertelement <8 x i16> %val, i16 %swap, i32 0
20  ret <8 x i16> %ret
21}
22
23; Test v8i16 insertion into the last element.
24define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) {
25; CHECK-LABEL: f2:
26; CHECK: vlebrh %v24, 0(%r2), 7
27; CHECK: br %r14
28  %element = load i16, i16 *%ptr
29  %swap = call i16 @llvm.bswap.i16(i16 %element)
30  %ret = insertelement <8 x i16> %val, i16 %swap, i32 7
31  ret <8 x i16> %ret
32}
33
34; Test v8i16 insertion with the highest in-range offset.
35define <8 x i16> @f3(<8 x i16> %val, i16 *%base) {
36; CHECK-LABEL: f3:
37; CHECK: vlebrh %v24, 4094(%r2), 5
38; CHECK: br %r14
39  %ptr = getelementptr i16, i16 *%base, i32 2047
40  %element = load i16, i16 *%ptr
41  %swap = call i16 @llvm.bswap.i16(i16 %element)
42  %ret = insertelement <8 x i16> %val, i16 %swap, i32 5
43  ret <8 x i16> %ret
44}
45
46; Test v8i16 insertion with the first ouf-of-range offset.
47define <8 x i16> @f4(<8 x i16> %val, i16 *%base) {
48; CHECK-LABEL: f4:
49; CHECK: aghi %r2, 4096
50; CHECK: vlebrh %v24, 0(%r2), 1
51; CHECK: br %r14
52  %ptr = getelementptr i16, i16 *%base, i32 2048
53  %element = load i16, i16 *%ptr
54  %swap = call i16 @llvm.bswap.i16(i16 %element)
55  %ret = insertelement <8 x i16> %val, i16 %swap, i32 1
56  ret <8 x i16> %ret
57}
58
59; Test v8i16 insertion into a variable element.
60define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) {
61; CHECK-LABEL: f5:
62; CHECK-NOT: vlebrh
63; CHECK: br %r14
64  %element = load i16, i16 *%ptr
65  %swap = call i16 @llvm.bswap.i16(i16 %element)
66  %ret = insertelement <8 x i16> %val, i16 %swap, i32 %index
67  ret <8 x i16> %ret
68}
69
70; Test v8i16 insertion using a pair of vector bswaps.
71define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
72; CHECK-LABEL: f6:
73; CHECK: vlebrh %v24, 0(%r2), 0
74; CHECK: br %r14
75  %element = load i16, i16 *%ptr
76  %swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
77  %insert = insertelement <8 x i16> %swapval, i16 %element, i32 0
78  %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
79  ret <8 x i16> %ret
80}
81
82; Test v4i32 insertion into the first element.
83define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) {
84; CHECK-LABEL: f7:
85; CHECK: vlebrf %v24, 0(%r2), 0
86; CHECK: br %r14
87  %element = load i32, i32 *%ptr
88  %swap = call i32 @llvm.bswap.i32(i32 %element)
89  %ret = insertelement <4 x i32> %val, i32 %swap, i32 0
90  ret <4 x i32> %ret
91}
92
93; Test v4i32 insertion into the last element.
94define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) {
95; CHECK-LABEL: f8:
96; CHECK: vlebrf %v24, 0(%r2), 3
97; CHECK: br %r14
98  %element = load i32, i32 *%ptr
99  %swap = call i32 @llvm.bswap.i32(i32 %element)
100  %ret = insertelement <4 x i32> %val, i32 %swap, i32 3
101  ret <4 x i32> %ret
102}
103
104; Test v4i32 insertion with the highest in-range offset.
105define <4 x i32> @f9(<4 x i32> %val, i32 *%base) {
106; CHECK-LABEL: f9:
107; CHECK: vlebrf %v24, 4092(%r2), 2
108; CHECK: br %r14
109  %ptr = getelementptr i32, i32 *%base, i32 1023
110  %element = load i32, i32 *%ptr
111  %swap = call i32 @llvm.bswap.i32(i32 %element)
112  %ret = insertelement <4 x i32> %val, i32 %swap, i32 2
113  ret <4 x i32> %ret
114}
115
116; Test v4i32 insertion with the first ouf-of-range offset.
117define <4 x i32> @f10(<4 x i32> %val, i32 *%base) {
118; CHECK-LABEL: f10:
119; CHECK: aghi %r2, 4096
120; CHECK: vlebrf %v24, 0(%r2), 1
121; CHECK: br %r14
122  %ptr = getelementptr i32, i32 *%base, i32 1024
123  %element = load i32, i32 *%ptr
124  %swap = call i32 @llvm.bswap.i32(i32 %element)
125  %ret = insertelement <4 x i32> %val, i32 %swap, i32 1
126  ret <4 x i32> %ret
127}
128
129; Test v4i32 insertion into a variable element.
130define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) {
131; CHECK-LABEL: f11:
132; CHECK-NOT: vlebrf
133; CHECK: br %r14
134  %element = load i32, i32 *%ptr
135  %swap = call i32 @llvm.bswap.i32(i32 %element)
136  %ret = insertelement <4 x i32> %val, i32 %swap, i32 %index
137  ret <4 x i32> %ret
138}
139
140; Test v4i32 insertion using a pair of vector bswaps.
141define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
142; CHECK-LABEL: f12:
143; CHECK: vlebrf %v24, 0(%r2), 0
144; CHECK: br %r14
145  %element = load i32, i32 *%ptr
146  %swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
147  %insert = insertelement <4 x i32> %swapval, i32 %element, i32 0
148  %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
149  ret <4 x i32> %ret
150}
151
152; Test v2i64 insertion into the first element.
153define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) {
154; CHECK-LABEL: f13:
155; CHECK: vlebrg %v24, 0(%r2), 0
156; CHECK: br %r14
157  %element = load i64, i64 *%ptr
158  %swap = call i64 @llvm.bswap.i64(i64 %element)
159  %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
160  ret <2 x i64> %ret
161}
162
163; Test v2i64 insertion into the last element.
164define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) {
165; CHECK-LABEL: f14:
166; CHECK: vlebrg %v24, 0(%r2), 1
167; CHECK: br %r14
168  %element = load i64, i64 *%ptr
169  %swap = call i64 @llvm.bswap.i64(i64 %element)
170  %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
171  ret <2 x i64> %ret
172}
173
174; Test v2i64 insertion with the highest in-range offset.
175define <2 x i64> @f15(<2 x i64> %val, i64 *%base) {
176; CHECK-LABEL: f15:
177; CHECK: vlebrg %v24, 4088(%r2), 1
178; CHECK: br %r14
179  %ptr = getelementptr i64, i64 *%base, i32 511
180  %element = load i64, i64 *%ptr
181  %swap = call i64 @llvm.bswap.i64(i64 %element)
182  %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
183  ret <2 x i64> %ret
184}
185
186; Test v2i64 insertion with the first ouf-of-range offset.
187define <2 x i64> @f16(<2 x i64> %val, i64 *%base) {
188; CHECK-LABEL: f16:
189; CHECK: aghi %r2, 4096
190; CHECK: vlebrg %v24, 0(%r2), 0
191; CHECK: br %r14
192  %ptr = getelementptr i64, i64 *%base, i32 512
193  %element = load i64, i64 *%ptr
194  %swap = call i64 @llvm.bswap.i64(i64 %element)
195  %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
196  ret <2 x i64> %ret
197}
198
199; Test v2i64 insertion into a variable element.
200define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) {
201; CHECK-LABEL: f17:
202; CHECK-NOT: vlebrg
203; CHECK: br %r14
204  %element = load i64, i64 *%ptr
205  %swap = call i64 @llvm.bswap.i64(i64 %element)
206  %ret = insertelement <2 x i64> %val, i64 %swap, i32 %index
207  ret <2 x i64> %ret
208}
209
210; Test v2i64 insertion using a pair of vector bswaps.
211define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) {
212; CHECK-LABEL: f18:
213; CHECK: vlebrg %v24, 0(%r2), 0
214; CHECK: br %r14
215  %element = load i64, i64 *%ptr
216  %swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
217  %insert = insertelement <2 x i64> %swapval, i64 %element, i32 0
218  %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
219  ret <2 x i64> %ret
220}
221