1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --force-update
2 // REQUIRES: webassembly-registered-target, asserts
3
4 // FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline.
5
6 // RUN: %clang %s -O2 -emit-llvm -S -o - -target wasm32-unknown-unknown -msimd128 -Wcast-qual -fno-lax-vector-conversions -Werror | FileCheck %s
7
8 #include <wasm_simd128.h>
9
10 // CHECK-LABEL: @test_v128_load(
11 // CHECK-NEXT: entry:
12 // CHECK-NEXT: [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
13 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2:![0-9]+]]
14 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
15 //
test_v128_load(const void * mem)16 v128_t test_v128_load(const void *mem) {
17 return wasm_v128_load(mem);
18 }
19
20 // CHECK-LABEL: @test_v128_load8_splat(
21 // CHECK-NEXT: entry:
22 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
23 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
24 // CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
25 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
26 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
27 //
test_v128_load8_splat(const void * mem)28 v128_t test_v128_load8_splat(const void *mem) {
29 return wasm_v128_load8_splat(mem);
30 }
31
32 // CHECK-LABEL: @test_v128_load16_splat(
33 // CHECK-NEXT: entry:
34 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i16*
35 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__V1_I]], align 1, !tbaa [[TBAA2]]
36 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
37 // CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
38 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
39 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
40 //
test_v128_load16_splat(const void * mem)41 v128_t test_v128_load16_splat(const void *mem) {
42 return wasm_v128_load16_splat(mem);
43 }
44
45 // CHECK-LABEL: @test_v128_load32_splat(
46 // CHECK-NEXT: entry:
47 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
48 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
49 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
50 // CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
51 // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]]
52 //
test_v128_load32_splat(const void * mem)53 v128_t test_v128_load32_splat(const void *mem) {
54 return wasm_v128_load32_splat(mem);
55 }
56
57 // CHECK-LABEL: @test_v128_load64_splat(
58 // CHECK-NEXT: entry:
59 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
60 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
61 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 0
62 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
63 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
64 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
65 //
test_v128_load64_splat(const void * mem)66 v128_t test_v128_load64_splat(const void *mem) {
67 return wasm_v128_load64_splat(mem);
68 }
69
70 // CHECK-LABEL: @test_i16x8_load8x8(
71 // CHECK-NEXT: entry:
72 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
73 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
74 // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
75 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
76 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
77 //
test_i16x8_load8x8(const void * mem)78 v128_t test_i16x8_load8x8(const void *mem) {
79 return wasm_i16x8_load8x8(mem);
80 }
81
82 // CHECK-LABEL: @test_u16x8_load8x8(
83 // CHECK-NEXT: entry:
84 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
85 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
86 // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
87 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
88 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
89 //
test_u16x8_load8x8(const void * mem)90 v128_t test_u16x8_load8x8(const void *mem) {
91 return wasm_u16x8_load8x8(mem);
92 }
93
94 // CHECK-LABEL: @test_i32x4_load16x4(
95 // CHECK-NEXT: entry:
96 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
97 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
98 // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
99 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
100 //
test_i32x4_load16x4(const void * mem)101 v128_t test_i32x4_load16x4(const void *mem) {
102 return wasm_i32x4_load16x4(mem);
103 }
104
105 // CHECK-LABEL: @test_u32x4_load16x4(
106 // CHECK-NEXT: entry:
107 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
108 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
109 // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
110 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
111 //
test_u32x4_load16x4(const void * mem)112 v128_t test_u32x4_load16x4(const void *mem) {
113 return wasm_u32x4_load16x4(mem);
114 }
115
116 // CHECK-LABEL: @test_i64x2_load32x2(
117 // CHECK-NEXT: entry:
118 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
119 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
120 // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
121 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
122 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
123 //
test_i64x2_load32x2(const void * mem)124 v128_t test_i64x2_load32x2(const void *mem) {
125 return wasm_i64x2_load32x2(mem);
126 }
127
128 // CHECK-LABEL: @test_u64x2_load32x2(
129 // CHECK-NEXT: entry:
130 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
131 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
132 // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
133 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
134 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
135 //
test_u64x2_load32x2(const void * mem)136 v128_t test_u64x2_load32x2(const void *mem) {
137 return wasm_u64x2_load32x2(mem);
138 }
139
140 // CHECK-LABEL: @test_v128_load32_zero(
141 // CHECK-NEXT: entry:
142 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
143 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
144 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 0
145 // CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]]
146 //
test_v128_load32_zero(const void * mem)147 v128_t test_v128_load32_zero(const void *mem) {
148 return wasm_v128_load32_zero(mem);
149 }
150
151 // CHECK-LABEL: @test_v128_load64_zero(
152 // CHECK-NEXT: entry:
153 // CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
154 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
155 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
156 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
157 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
158 //
test_v128_load64_zero(const void * mem)159 v128_t test_v128_load64_zero(const void *mem) {
160 return wasm_v128_load64_zero(mem);
161 }
162
163 // CHECK-LABEL: @test_v128_load8_lane(
164 // CHECK-NEXT: entry:
165 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
166 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
167 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i32 15
168 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
169 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
170 //
test_v128_load8_lane(const uint8_t * ptr,v128_t vec)171 v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) {
172 return wasm_v128_load8_lane(ptr, vec, 15);
173 }
174
175 // CHECK-LABEL: @test_v128_load16_lane(
176 // CHECK-NEXT: entry:
177 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
178 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
179 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i32 7
180 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
181 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
182 //
test_v128_load16_lane(const uint16_t * ptr,v128_t vec)183 v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) {
184 return wasm_v128_load16_lane(ptr, vec, 7);
185 }
186
187 // CHECK-LABEL: @test_v128_load32_lane(
188 // CHECK-NEXT: entry:
189 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
190 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i32 3
191 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
192 //
test_v128_load32_lane(const uint32_t * ptr,v128_t vec)193 v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) {
194 return wasm_v128_load32_lane(ptr, vec, 3);
195 }
196
197 // CHECK-LABEL: @test_v128_load64_lane(
198 // CHECK-NEXT: entry:
199 // CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
200 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
201 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i32 1
202 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
203 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
204 //
test_v128_load64_lane(const uint64_t * ptr,v128_t vec)205 v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) {
206 return wasm_v128_load64_lane(ptr, vec, 1);
207 }
208
209 // CHECK-LABEL: @test_v128_store(
210 // CHECK-NEXT: entry:
211 // CHECK-NEXT: [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
212 // CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2]]
213 // CHECK-NEXT: ret void
214 //
test_v128_store(void * mem,v128_t a)215 void test_v128_store(void *mem, v128_t a) {
216 return wasm_v128_store(mem, a);
217 }
218
219 // CHECK-LABEL: @test_v128_store8_lane(
220 // CHECK-NEXT: entry:
221 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
222 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
223 // CHECK-NEXT: store i8 [[VECEXT_I]], i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
224 // CHECK-NEXT: ret void
225 //
test_v128_store8_lane(uint8_t * ptr,v128_t vec)226 void test_v128_store8_lane(uint8_t *ptr, v128_t vec) {
227 return wasm_v128_store8_lane(ptr, vec, 15);
228 }
229
230 // CHECK-LABEL: @test_v128_store16_lane(
231 // CHECK-NEXT: entry:
232 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
233 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
234 // CHECK-NEXT: store i16 [[VECEXT_I]], i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
235 // CHECK-NEXT: ret void
236 //
test_v128_store16_lane(uint16_t * ptr,v128_t vec)237 void test_v128_store16_lane(uint16_t *ptr, v128_t vec) {
238 return wasm_v128_store16_lane(ptr, vec, 7);
239 }
240
241 // CHECK-LABEL: @test_v128_store32_lane(
242 // CHECK-NEXT: entry:
243 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i32 3
244 // CHECK-NEXT: store i32 [[VECEXT_I]], i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
245 // CHECK-NEXT: ret void
246 //
test_v128_store32_lane(uint32_t * ptr,v128_t vec)247 void test_v128_store32_lane(uint32_t *ptr, v128_t vec) {
248 return wasm_v128_store32_lane(ptr, vec, 3);
249 }
250
251 // CHECK-LABEL: @test_v128_store64_lane(
252 // CHECK-NEXT: entry:
253 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
254 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
255 // CHECK-NEXT: store i64 [[VECEXT_I]], i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
256 // CHECK-NEXT: ret void
257 //
test_v128_store64_lane(uint64_t * ptr,v128_t vec)258 void test_v128_store64_lane(uint64_t *ptr, v128_t vec) {
259 return wasm_v128_store64_lane(ptr, vec, 1);
260 }
261
262 // CHECK-LABEL: @test_i8x16_make(
263 // CHECK-NEXT: entry:
264 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[C0:%.*]], i32 0
265 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i32 1
266 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i32 2
267 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i32 3
268 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i32 4
269 // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i32 5
270 // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i32 6
271 // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i32 7
272 // CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i32 8
273 // CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i32 9
274 // CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i32 10
275 // CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i32 11
276 // CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i32 12
277 // CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i32 13
278 // CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i32 14
279 // CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i32 15
280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
281 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
282 //
test_i8x16_make(int8_t c0,int8_t c1,int8_t c2,int8_t c3,int8_t c4,int8_t c5,int8_t c6,int8_t c7,int8_t c8,int8_t c9,int8_t c10,int8_t c11,int8_t c12,int8_t c13,int8_t c14,int8_t c15)283 v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
284 return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
285 }
286
287 // CHECK-LABEL: @test_i16x8_make(
288 // CHECK-NEXT: entry:
289 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[C0:%.*]], i32 0
290 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i32 1
291 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i32 2
292 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i32 3
293 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i32 4
294 // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i32 5
295 // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i32 6
296 // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i32 7
297 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
298 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
299 //
test_i16x8_make(int16_t c0,int16_t c1,int16_t c2,int16_t c3,int16_t c4,int16_t c5,int16_t c6,int16_t c7)300 v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
301 return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
302 }
303
304 // CHECK-LABEL: @test_i32x4_make(
305 // CHECK-NEXT: entry:
306 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[C0:%.*]], i32 0
307 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i32 1
308 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i32 2
309 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i32 3
310 // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
311 //
test_i32x4_make(int32_t c0,int32_t c1,int32_t c2,int32_t c3)312 v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
313 return wasm_i32x4_make(c0, c1, c2, c3);
314 }
315
316 // CHECK-LABEL: @test_i64x2_make(
317 // CHECK-NEXT: entry:
318 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[C0:%.*]], i32 0
319 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i32 1
320 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
321 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
322 //
test_i64x2_make(int64_t c0,int64_t c1)323 v128_t test_i64x2_make(int64_t c0, int64_t c1) {
324 return wasm_i64x2_make(c0, c1);
325 }
326
327 // CHECK-LABEL: @test_f32x4_make(
328 // CHECK-NEXT: entry:
329 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[C0:%.*]], i32 0
330 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i32 1
331 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i32 2
332 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i32 3
333 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
334 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
335 //
test_f32x4_make(float c0,float c1,float c2,float c3)336 v128_t test_f32x4_make(float c0, float c1, float c2, float c3) {
337 return wasm_f32x4_make(c0, c1, c2, c3);
338 }
339
340 // CHECK-LABEL: @test_f64x2_make(
341 // CHECK-NEXT: entry:
342 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[C0:%.*]], i32 0
343 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i32 1
344 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
345 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
346 //
test_f64x2_make(double c0,double c1)347 v128_t test_f64x2_make(double c0, double c1) {
348 return wasm_f64x2_make(c0, c1);
349 }
350
351 // CHECK-LABEL: @test_i8x16_const(
352 // CHECK-NEXT: entry:
353 // CHECK-NEXT: ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
354 //
test_i8x16_const()355 v128_t test_i8x16_const() {
356 return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
357 }
358
359 // CHECK-LABEL: @test_i16x8_const(
360 // CHECK-NEXT: entry:
361 // CHECK-NEXT: ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
362 //
test_i16x8_const()363 v128_t test_i16x8_const() {
364 return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
365 }
366
367 // CHECK-LABEL: @test_i32x4_const(
368 // CHECK-NEXT: entry:
369 // CHECK-NEXT: ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
370 //
test_i32x4_const()371 v128_t test_i32x4_const() {
372 return wasm_i32x4_const(0, 1, 2, 3);
373 }
374
375 // CHECK-LABEL: @test_i64x2_const(
376 // CHECK-NEXT: entry:
377 // CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
378 //
test_i64x2_const()379 v128_t test_i64x2_const() {
380 return wasm_i64x2_const(0, 1);
381 }
382
383 // CHECK-LABEL: @test_f32x4_const(
384 // CHECK-NEXT: entry:
385 // CHECK-NEXT: ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
386 //
test_f32x4_const()387 v128_t test_f32x4_const() {
388 return wasm_f32x4_const(0, 1, 2, 3);
389 }
390
391 // CHECK-LABEL: @test_f64x2_const(
392 // CHECK-NEXT: entry:
393 // CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
394 //
test_f64x2_const()395 v128_t test_f64x2_const() {
396 return wasm_f64x2_const(0, 1);
397 }
398
399 // CHECK-LABEL: @test_i8x16_const_splat(
400 // CHECK-NEXT: entry:
401 // CHECK-NEXT: ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
402 //
test_i8x16_const_splat()403 v128_t test_i8x16_const_splat() {
404 return wasm_i8x16_const_splat(42);
405 }
406
407 // CHECK-LABEL: @test_i16x8_const_splat(
408 // CHECK-NEXT: entry:
409 // CHECK-NEXT: ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
410 //
test_i16x8_const_splat()411 v128_t test_i16x8_const_splat() {
412 return wasm_i16x8_const_splat(42);
413 }
414
415 // CHECK-LABEL: @test_i32x4_const_splat(
416 // CHECK-NEXT: entry:
417 // CHECK-NEXT: ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
418 //
test_i32x4_const_splat()419 v128_t test_i32x4_const_splat() {
420 return wasm_i32x4_const_splat(42);
421 }
422
423 // CHECK-LABEL: @test_i64x2_const_splat(
424 // CHECK-NEXT: entry:
425 // CHECK-NEXT: ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
426 //
test_i64x2_const_splat()427 v128_t test_i64x2_const_splat() {
428 return wasm_i64x2_const_splat(42);
429 }
430
431 // CHECK-LABEL: @test_f32x4_const_splat(
432 // CHECK-NEXT: entry:
433 // CHECK-NEXT: ret <4 x i32> <i32 1109917696, i32 1109917696, i32 1109917696, i32 1109917696>
434 //
test_f32x4_const_splat()435 v128_t test_f32x4_const_splat() {
436 return wasm_f32x4_const_splat(42);
437 }
438
439 // CHECK-LABEL: @test_f64x2_const_splat(
440 // CHECK-NEXT: entry:
441 // CHECK-NEXT: ret <4 x i32> <i32 0, i32 1078263808, i32 0, i32 1078263808>
442 //
test_f64x2_const_splat()443 v128_t test_f64x2_const_splat() {
444 return wasm_f64x2_const_splat(42);
445 }
446
447 // CHECK-LABEL: @test_i8x16_splat(
448 // CHECK-NEXT: entry:
449 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
450 // CHECK-NEXT: [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
451 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
452 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
453 //
test_i8x16_splat(int8_t a)454 v128_t test_i8x16_splat(int8_t a) {
455 return wasm_i8x16_splat(a);
456 }
457
458 // CHECK-LABEL: @test_i8x16_extract_lane(
459 // CHECK-NEXT: entry:
460 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
461 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
462 // CHECK-NEXT: ret i8 [[VECEXT_I]]
463 //
test_i8x16_extract_lane(v128_t a)464 int8_t test_i8x16_extract_lane(v128_t a) {
465 return wasm_i8x16_extract_lane(a, 15);
466 }
467
468 // CHECK-LABEL: @test_u8x16_extract_lane(
469 // CHECK-NEXT: entry:
470 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
471 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
472 // CHECK-NEXT: ret i8 [[VECEXT_I]]
473 //
test_u8x16_extract_lane(v128_t a)474 uint8_t test_u8x16_extract_lane(v128_t a) {
475 return wasm_u8x16_extract_lane(a, 15);
476 }
477
478 // CHECK-LABEL: @test_i8x16_replace_lane(
479 // CHECK-NEXT: entry:
480 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
481 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i32 15
482 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
483 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
484 //
test_i8x16_replace_lane(v128_t a,int8_t b)485 v128_t test_i8x16_replace_lane(v128_t a, int8_t b) {
486 return wasm_i8x16_replace_lane(a, 15, b);
487 }
488
489 // CHECK-LABEL: @test_i16x8_splat(
490 // CHECK-NEXT: entry:
491 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
492 // CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
493 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
494 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
495 //
test_i16x8_splat(int16_t a)496 v128_t test_i16x8_splat(int16_t a) {
497 return wasm_i16x8_splat(a);
498 }
499
500 // CHECK-LABEL: @test_i16x8_extract_lane(
501 // CHECK-NEXT: entry:
502 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
503 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
504 // CHECK-NEXT: ret i16 [[VECEXT_I]]
505 //
test_i16x8_extract_lane(v128_t a)506 int16_t test_i16x8_extract_lane(v128_t a) {
507 return wasm_i16x8_extract_lane(a, 7);
508 }
509
510 // CHECK-LABEL: @test_u16x8_extract_lane(
511 // CHECK-NEXT: entry:
512 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
513 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
514 // CHECK-NEXT: ret i16 [[VECEXT_I]]
515 //
test_u16x8_extract_lane(v128_t a)516 uint16_t test_u16x8_extract_lane(v128_t a) {
517 return wasm_u16x8_extract_lane(a, 7);
518 }
519
520 // CHECK-LABEL: @test_i16x8_replace_lane(
521 // CHECK-NEXT: entry:
522 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
523 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i32 7
524 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
525 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
526 //
test_i16x8_replace_lane(v128_t a,int16_t b)527 v128_t test_i16x8_replace_lane(v128_t a, int16_t b) {
528 return wasm_i16x8_replace_lane(a, 7, b);
529 }
530
531 // CHECK-LABEL: @test_i32x4_splat(
532 // CHECK-NEXT: entry:
533 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
534 // CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
535 // CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
536 //
test_i32x4_splat(int32_t a)537 v128_t test_i32x4_splat(int32_t a) {
538 return wasm_i32x4_splat(a);
539 }
540
541 // CHECK-LABEL: @test_i32x4_extract_lane(
542 // CHECK-NEXT: entry:
543 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
544 // CHECK-NEXT: ret i32 [[VECEXT_I]]
545 //
test_i32x4_extract_lane(v128_t a)546 int32_t test_i32x4_extract_lane(v128_t a) {
547 return wasm_i32x4_extract_lane(a, 3);
548 }
549
550 // CHECK-LABEL: @test_i32x4_replace_lane(
551 // CHECK-NEXT: entry:
552 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 3
553 // CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
554 //
test_i32x4_replace_lane(v128_t a,int32_t b)555 v128_t test_i32x4_replace_lane(v128_t a, int32_t b) {
556 return wasm_i32x4_replace_lane(a, 3, b);
557 }
558
559 // CHECK-LABEL: @test_i64x2_splat(
560 // CHECK-NEXT: entry:
561 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i32 0
562 // CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
563 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
564 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
565 //
test_i64x2_splat(int64_t a)566 v128_t test_i64x2_splat(int64_t a) {
567 return wasm_i64x2_splat(a);
568 }
569
570 // CHECK-LABEL: @test_i64x2_extract_lane(
571 // CHECK-NEXT: entry:
572 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
573 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
574 // CHECK-NEXT: ret i64 [[VECEXT_I]]
575 //
test_i64x2_extract_lane(v128_t a)576 int64_t test_i64x2_extract_lane(v128_t a) {
577 return wasm_i64x2_extract_lane(a, 1);
578 }
579
580 // CHECK-LABEL: @test_i64x2_replace_lane(
581 // CHECK-NEXT: entry:
582 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
583 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i32 1
584 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
585 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
586 //
test_i64x2_replace_lane(v128_t a,int64_t b)587 v128_t test_i64x2_replace_lane(v128_t a, int64_t b) {
588 return wasm_i64x2_replace_lane(a, 1, b);
589 }
590
591 // CHECK-LABEL: @test_f32x4_splat(
592 // CHECK-NEXT: entry:
593 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
594 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
595 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> zeroinitializer
596 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
597 //
test_f32x4_splat(float a)598 v128_t test_f32x4_splat(float a) {
599 return wasm_f32x4_splat(a);
600 }
601
602 // CHECK-LABEL: @test_f32x4_extract_lane(
603 // CHECK-NEXT: entry:
604 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
605 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
606 // CHECK-NEXT: ret float [[VECEXT_I]]
607 //
test_f32x4_extract_lane(v128_t a)608 float test_f32x4_extract_lane(v128_t a) {
609 return wasm_f32x4_extract_lane(a, 3);
610 }
611
612 // CHECK-LABEL: @test_f32x4_replace_lane(
613 // CHECK-NEXT: entry:
614 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
615 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 3
616 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32>
617 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
618 //
test_f32x4_replace_lane(v128_t a,float b)619 v128_t test_f32x4_replace_lane(v128_t a, float b) {
620 return wasm_f32x4_replace_lane(a, 3, b);
621 }
622
623 // CHECK-LABEL: @test_f64x2_splat(
624 // CHECK-NEXT: entry:
625 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
626 // CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer
627 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
628 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
629 //
test_f64x2_splat(double a)630 v128_t test_f64x2_splat(double a) {
631 return wasm_f64x2_splat(a);
632 }
633
634 // CHECK-LABEL: @test_f64x2_extract_lane(
635 // CHECK-NEXT: entry:
636 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
637 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
638 // CHECK-NEXT: ret double [[VECEXT_I]]
639 //
test_f64x2_extract_lane(v128_t a)640 double test_f64x2_extract_lane(v128_t a) {
641 return wasm_f64x2_extract_lane(a, 1);
642 }
643
644 // CHECK-LABEL: @test_f64x2_replace_lane(
645 // CHECK-NEXT: entry:
646 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
647 // CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i32 1
648 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32>
649 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
650 //
test_f64x2_replace_lane(v128_t a,double b)651 v128_t test_f64x2_replace_lane(v128_t a, double b) {
652 return wasm_f64x2_replace_lane(a, 1, b);
653 }
654
655 // CHECK-LABEL: @test_i8x16_eq(
656 // CHECK-NEXT: entry:
657 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
658 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
659 // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
660 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
661 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
662 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
663 //
test_i8x16_eq(v128_t a,v128_t b)664 v128_t test_i8x16_eq(v128_t a, v128_t b) {
665 return wasm_i8x16_eq(a, b);
666 }
667
668 // CHECK-LABEL: @test_i8x16_ne(
669 // CHECK-NEXT: entry:
670 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
671 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
672 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
673 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
674 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
675 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
676 //
test_i8x16_ne(v128_t a,v128_t b)677 v128_t test_i8x16_ne(v128_t a, v128_t b) {
678 return wasm_i8x16_ne(a, b);
679 }
680
681 // CHECK-LABEL: @test_i8x16_lt(
682 // CHECK-NEXT: entry:
683 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
684 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
685 // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
686 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
687 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
688 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
689 //
test_i8x16_lt(v128_t a,v128_t b)690 v128_t test_i8x16_lt(v128_t a, v128_t b) {
691 return wasm_i8x16_lt(a, b);
692 }
693
694 // CHECK-LABEL: @test_u8x16_lt(
695 // CHECK-NEXT: entry:
696 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
697 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
698 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
699 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
700 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
701 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
702 //
test_u8x16_lt(v128_t a,v128_t b)703 v128_t test_u8x16_lt(v128_t a, v128_t b) {
704 return wasm_u8x16_lt(a, b);
705 }
706
707 // CHECK-LABEL: @test_i8x16_gt(
708 // CHECK-NEXT: entry:
709 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
710 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
711 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
712 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
713 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
714 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
715 //
test_i8x16_gt(v128_t a,v128_t b)716 v128_t test_i8x16_gt(v128_t a, v128_t b) {
717 return wasm_i8x16_gt(a, b);
718 }
719
720 // CHECK-LABEL: @test_u8x16_gt(
721 // CHECK-NEXT: entry:
722 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
723 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
724 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
725 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
726 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
727 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
728 //
test_u8x16_gt(v128_t a,v128_t b)729 v128_t test_u8x16_gt(v128_t a, v128_t b) {
730 return wasm_u8x16_gt(a, b);
731 }
732
733 // CHECK-LABEL: @test_i8x16_le(
734 // CHECK-NEXT: entry:
735 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
736 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
737 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
738 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
739 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
740 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
741 //
test_i8x16_le(v128_t a,v128_t b)742 v128_t test_i8x16_le(v128_t a, v128_t b) {
743 return wasm_i8x16_le(a, b);
744 }
745
746 // CHECK-LABEL: @test_u8x16_le(
747 // CHECK-NEXT: entry:
748 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
749 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
750 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
751 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
752 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
753 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
754 //
test_u8x16_le(v128_t a,v128_t b)755 v128_t test_u8x16_le(v128_t a, v128_t b) {
756 return wasm_u8x16_le(a, b);
757 }
758
759 // CHECK-LABEL: @test_i8x16_ge(
760 // CHECK-NEXT: entry:
761 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
762 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
763 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
764 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
765 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
766 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
767 //
test_i8x16_ge(v128_t a,v128_t b)768 v128_t test_i8x16_ge(v128_t a, v128_t b) {
769 return wasm_i8x16_ge(a, b);
770 }
771
772 // CHECK-LABEL: @test_u8x16_ge(
773 // CHECK-NEXT: entry:
774 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
775 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
776 // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
777 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
778 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
779 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
780 //
test_u8x16_ge(v128_t a,v128_t b)781 v128_t test_u8x16_ge(v128_t a, v128_t b) {
782 return wasm_u8x16_ge(a, b);
783 }
784
785 // CHECK-LABEL: @test_i16x8_eq(
786 // CHECK-NEXT: entry:
787 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
788 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
789 // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
790 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
791 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
792 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
793 //
test_i16x8_eq(v128_t a,v128_t b)794 v128_t test_i16x8_eq(v128_t a, v128_t b) {
795 return wasm_i16x8_eq(a, b);
796 }
797
798 // CHECK-LABEL: @test_i16x8_ne(
799 // CHECK-NEXT: entry:
800 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
801 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
802 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
803 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
804 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
805 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
806 //
test_i16x8_ne(v128_t a,v128_t b)807 v128_t test_i16x8_ne(v128_t a, v128_t b) {
808 return wasm_i16x8_ne(a, b);
809 }
810
811 // CHECK-LABEL: @test_i16x8_lt(
812 // CHECK-NEXT: entry:
813 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
814 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
815 // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
816 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
817 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
818 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
819 //
test_i16x8_lt(v128_t a,v128_t b)820 v128_t test_i16x8_lt(v128_t a, v128_t b) {
821 return wasm_i16x8_lt(a, b);
822 }
823
824 // CHECK-LABEL: @test_u16x8_lt(
825 // CHECK-NEXT: entry:
826 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
827 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
828 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
829 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
830 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
831 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
832 //
test_u16x8_lt(v128_t a,v128_t b)833 v128_t test_u16x8_lt(v128_t a, v128_t b) {
834 return wasm_u16x8_lt(a, b);
835 }
836
837 // CHECK-LABEL: @test_i16x8_gt(
838 // CHECK-NEXT: entry:
839 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
840 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
841 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
842 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
843 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
844 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
845 //
test_i16x8_gt(v128_t a,v128_t b)846 v128_t test_i16x8_gt(v128_t a, v128_t b) {
847 return wasm_i16x8_gt(a, b);
848 }
849
850 // CHECK-LABEL: @test_u16x8_gt(
851 // CHECK-NEXT: entry:
852 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
853 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
854 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
855 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
856 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
857 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
858 //
test_u16x8_gt(v128_t a,v128_t b)859 v128_t test_u16x8_gt(v128_t a, v128_t b) {
860 return wasm_u16x8_gt(a, b);
861 }
862
863 // CHECK-LABEL: @test_i16x8_le(
864 // CHECK-NEXT: entry:
865 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
866 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
867 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
868 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
869 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
870 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
871 //
test_i16x8_le(v128_t a,v128_t b)872 v128_t test_i16x8_le(v128_t a, v128_t b) {
873 return wasm_i16x8_le(a, b);
874 }
875
876 // CHECK-LABEL: @test_u16x8_le(
877 // CHECK-NEXT: entry:
878 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
879 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
880 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
881 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
882 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
883 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
884 //
test_u16x8_le(v128_t a,v128_t b)885 v128_t test_u16x8_le(v128_t a, v128_t b) {
886 return wasm_u16x8_le(a, b);
887 }
888
889 // CHECK-LABEL: @test_i16x8_ge(
890 // CHECK-NEXT: entry:
891 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
892 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
893 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
894 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
895 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
896 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
897 //
test_i16x8_ge(v128_t a,v128_t b)898 v128_t test_i16x8_ge(v128_t a, v128_t b) {
899 return wasm_i16x8_ge(a, b);
900 }
901
902 // CHECK-LABEL: @test_u16x8_ge(
903 // CHECK-NEXT: entry:
904 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
905 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
906 // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
907 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
908 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
909 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
910 //
test_u16x8_ge(v128_t a,v128_t b)911 v128_t test_u16x8_ge(v128_t a, v128_t b) {
912 return wasm_u16x8_ge(a, b);
913 }
914
915 // CHECK-LABEL: @test_i32x4_eq(
916 // CHECK-NEXT: entry:
917 // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
918 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
919 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
920 //
test_i32x4_eq(v128_t a,v128_t b)921 v128_t test_i32x4_eq(v128_t a, v128_t b) {
922 return wasm_i32x4_eq(a, b);
923 }
924
925 // CHECK-LABEL: @test_i32x4_ne(
926 // CHECK-NEXT: entry:
927 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
928 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
929 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
930 //
test_i32x4_ne(v128_t a,v128_t b)931 v128_t test_i32x4_ne(v128_t a, v128_t b) {
932 return wasm_i32x4_ne(a, b);
933 }
934
935 // CHECK-LABEL: @test_i32x4_lt(
936 // CHECK-NEXT: entry:
937 // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
938 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
939 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
940 //
test_i32x4_lt(v128_t a,v128_t b)941 v128_t test_i32x4_lt(v128_t a, v128_t b) {
942 return wasm_i32x4_lt(a, b);
943 }
944
945 // CHECK-LABEL: @test_u32x4_lt(
946 // CHECK-NEXT: entry:
947 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
948 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
949 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
950 //
test_u32x4_lt(v128_t a,v128_t b)951 v128_t test_u32x4_lt(v128_t a, v128_t b) {
952 return wasm_u32x4_lt(a, b);
953 }
954
955 // CHECK-LABEL: @test_i32x4_gt(
956 // CHECK-NEXT: entry:
957 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
958 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
959 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
960 //
test_i32x4_gt(v128_t a,v128_t b)961 v128_t test_i32x4_gt(v128_t a, v128_t b) {
962 return wasm_i32x4_gt(a, b);
963 }
964
965 // CHECK-LABEL: @test_u32x4_gt(
966 // CHECK-NEXT: entry:
967 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
968 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
969 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
970 //
test_u32x4_gt(v128_t a,v128_t b)971 v128_t test_u32x4_gt(v128_t a, v128_t b) {
972 return wasm_u32x4_gt(a, b);
973 }
974
975 // CHECK-LABEL: @test_i32x4_le(
976 // CHECK-NEXT: entry:
977 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
978 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
979 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
980 //
test_i32x4_le(v128_t a,v128_t b)981 v128_t test_i32x4_le(v128_t a, v128_t b) {
982 return wasm_i32x4_le(a, b);
983 }
984
985 // CHECK-LABEL: @test_u32x4_le(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
988 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
989 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
990 //
test_u32x4_le(v128_t a,v128_t b)991 v128_t test_u32x4_le(v128_t a, v128_t b) {
992 return wasm_u32x4_le(a, b);
993 }
994
995 // CHECK-LABEL: @test_i32x4_ge(
996 // CHECK-NEXT: entry:
997 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
998 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
999 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1000 //
test_i32x4_ge(v128_t a,v128_t b)1001 v128_t test_i32x4_ge(v128_t a, v128_t b) {
1002 return wasm_i32x4_ge(a, b);
1003 }
1004
1005 // CHECK-LABEL: @test_u32x4_ge(
1006 // CHECK-NEXT: entry:
1007 // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
1008 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1009 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1010 //
test_u32x4_ge(v128_t a,v128_t b)1011 v128_t test_u32x4_ge(v128_t a, v128_t b) {
1012 return wasm_u32x4_ge(a, b);
1013 }
1014
1015 // CHECK-LABEL: @test_i64x2_eq(
1016 // CHECK-NEXT: entry:
1017 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1018 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1019 // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
1020 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1021 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1022 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1023 //
test_i64x2_eq(v128_t a,v128_t b)1024 v128_t test_i64x2_eq(v128_t a, v128_t b) {
1025 return wasm_i64x2_eq(a, b);
1026 }
1027
1028 // CHECK-LABEL: @test_i64x2_ne(
1029 // CHECK-NEXT: entry:
1030 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1031 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1032 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
1033 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1034 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1035 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1036 //
test_i64x2_ne(v128_t a,v128_t b)1037 v128_t test_i64x2_ne(v128_t a, v128_t b) {
1038 return wasm_i64x2_ne(a, b);
1039 }
1040
1041 // CHECK-LABEL: @test_i64x2_lt(
1042 // CHECK-NEXT: entry:
1043 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1044 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1045 // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
1046 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1047 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1048 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1049 //
test_i64x2_lt(v128_t a,v128_t b)1050 v128_t test_i64x2_lt(v128_t a, v128_t b) {
1051 return wasm_i64x2_lt(a, b);
1052 }
1053
1054 // CHECK-LABEL: @test_i64x2_gt(
1055 // CHECK-NEXT: entry:
1056 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1057 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1058 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
1059 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1060 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1061 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1062 //
test_i64x2_gt(v128_t a,v128_t b)1063 v128_t test_i64x2_gt(v128_t a, v128_t b) {
1064 return wasm_i64x2_gt(a, b);
1065 }
1066
1067 // CHECK-LABEL: @test_i64x2_le(
1068 // CHECK-NEXT: entry:
1069 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1070 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1071 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
1072 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1073 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1074 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1075 //
test_i64x2_le(v128_t a,v128_t b)1076 v128_t test_i64x2_le(v128_t a, v128_t b) {
1077 return wasm_i64x2_le(a, b);
1078 }
1079
1080 // CHECK-LABEL: @test_i64x2_ge(
1081 // CHECK-NEXT: entry:
1082 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1083 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1084 // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
1085 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1086 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1087 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1088 //
test_i64x2_ge(v128_t a,v128_t b)1089 v128_t test_i64x2_ge(v128_t a, v128_t b) {
1090 return wasm_i64x2_ge(a, b);
1091 }
1092
1093 // CHECK-LABEL: @test_f32x4_eq(
1094 // CHECK-NEXT: entry:
1095 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1096 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1097 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
1098 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1099 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1100 //
test_f32x4_eq(v128_t a,v128_t b)1101 v128_t test_f32x4_eq(v128_t a, v128_t b) {
1102 return wasm_f32x4_eq(a, b);
1103 }
1104
1105 // CHECK-LABEL: @test_f32x4_ne(
1106 // CHECK-NEXT: entry:
1107 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1108 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1109 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
1110 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1111 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1112 //
test_f32x4_ne(v128_t a,v128_t b)1113 v128_t test_f32x4_ne(v128_t a, v128_t b) {
1114 return wasm_f32x4_ne(a, b);
1115 }
1116
1117 // CHECK-LABEL: @test_f32x4_lt(
1118 // CHECK-NEXT: entry:
1119 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1120 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1121 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
1122 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1123 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1124 //
test_f32x4_lt(v128_t a,v128_t b)1125 v128_t test_f32x4_lt(v128_t a, v128_t b) {
1126 return wasm_f32x4_lt(a, b);
1127 }
1128
1129 // CHECK-LABEL: @test_f32x4_gt(
1130 // CHECK-NEXT: entry:
1131 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1132 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1133 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
1134 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1135 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1136 //
test_f32x4_gt(v128_t a,v128_t b)1137 v128_t test_f32x4_gt(v128_t a, v128_t b) {
1138 return wasm_f32x4_gt(a, b);
1139 }
1140
1141 // CHECK-LABEL: @test_f32x4_le(
1142 // CHECK-NEXT: entry:
1143 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1144 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1145 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
1146 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1147 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1148 //
test_f32x4_le(v128_t a,v128_t b)1149 v128_t test_f32x4_le(v128_t a, v128_t b) {
1150 return wasm_f32x4_le(a, b);
1151 }
1152
1153 // CHECK-LABEL: @test_f32x4_ge(
1154 // CHECK-NEXT: entry:
1155 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1156 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1157 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
1158 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1159 // CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
1160 //
test_f32x4_ge(v128_t a,v128_t b)1161 v128_t test_f32x4_ge(v128_t a, v128_t b) {
1162 return wasm_f32x4_ge(a, b);
1163 }
1164
1165 // CHECK-LABEL: @test_f64x2_eq(
1166 // CHECK-NEXT: entry:
1167 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1168 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1169 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
1170 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1171 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1172 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1173 //
test_f64x2_eq(v128_t a,v128_t b)1174 v128_t test_f64x2_eq(v128_t a, v128_t b) {
1175 return wasm_f64x2_eq(a, b);
1176 }
1177
1178 // CHECK-LABEL: @test_f64x2_ne(
1179 // CHECK-NEXT: entry:
1180 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1181 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1182 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
1183 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1184 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1185 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1186 //
test_f64x2_ne(v128_t a,v128_t b)1187 v128_t test_f64x2_ne(v128_t a, v128_t b) {
1188 return wasm_f64x2_ne(a, b);
1189 }
1190
1191 // CHECK-LABEL: @test_f64x2_lt(
1192 // CHECK-NEXT: entry:
1193 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1194 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1195 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
1196 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1197 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1198 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1199 //
test_f64x2_lt(v128_t a,v128_t b)1200 v128_t test_f64x2_lt(v128_t a, v128_t b) {
1201 return wasm_f64x2_lt(a, b);
1202 }
1203
1204 // CHECK-LABEL: @test_f64x2_gt(
1205 // CHECK-NEXT: entry:
1206 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1207 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1208 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
1209 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1210 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1211 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1212 //
test_f64x2_gt(v128_t a,v128_t b)1213 v128_t test_f64x2_gt(v128_t a, v128_t b) {
1214 return wasm_f64x2_gt(a, b);
1215 }
1216
1217 // CHECK-LABEL: @test_f64x2_le(
1218 // CHECK-NEXT: entry:
1219 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1220 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1221 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
1222 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1223 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1224 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1225 //
test_f64x2_le(v128_t a,v128_t b)1226 v128_t test_f64x2_le(v128_t a, v128_t b) {
1227 return wasm_f64x2_le(a, b);
1228 }
1229
1230 // CHECK-LABEL: @test_f64x2_ge(
1231 // CHECK-NEXT: entry:
1232 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1233 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1234 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
1235 // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1236 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1237 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1238 //
test_f64x2_ge(v128_t a,v128_t b)1239 v128_t test_f64x2_ge(v128_t a, v128_t b) {
1240 return wasm_f64x2_ge(a, b);
1241 }
1242
1243 // CHECK-LABEL: @test_v128_not(
1244 // CHECK-NEXT: entry:
1245 // CHECK-NEXT: [[NEG_I:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
1246 // CHECK-NEXT: ret <4 x i32> [[NEG_I]]
1247 //
test_v128_not(v128_t a)1248 v128_t test_v128_not(v128_t a) {
1249 return wasm_v128_not(a);
1250 }
1251
1252 // CHECK-LABEL: @test_v128_and(
1253 // CHECK-NEXT: entry:
1254 // CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
1255 // CHECK-NEXT: ret <4 x i32> [[AND_I]]
1256 //
test_v128_and(v128_t a,v128_t b)1257 v128_t test_v128_and(v128_t a, v128_t b) {
1258 return wasm_v128_and(a, b);
1259 }
1260
1261 // CHECK-LABEL: @test_v128_or(
1262 // CHECK-NEXT: entry:
1263 // CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
1264 // CHECK-NEXT: ret <4 x i32> [[OR_I]]
1265 //
test_v128_or(v128_t a,v128_t b)1266 v128_t test_v128_or(v128_t a, v128_t b) {
1267 return wasm_v128_or(a, b);
1268 }
1269
1270 // CHECK-LABEL: @test_v128_xor(
1271 // CHECK-NEXT: entry:
1272 // CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
1273 // CHECK-NEXT: ret <4 x i32> [[XOR_I]]
1274 //
test_v128_xor(v128_t a,v128_t b)1275 v128_t test_v128_xor(v128_t a, v128_t b) {
1276 return wasm_v128_xor(a, b);
1277 }
1278
1279 // CHECK-LABEL: @test_v128_andnot(
1280 // CHECK-NEXT: entry:
1281 // CHECK-NEXT: [[NEG_I:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
1282 // CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[NEG_I]], [[A:%.*]]
1283 // CHECK-NEXT: ret <4 x i32> [[AND_I]]
1284 //
test_v128_andnot(v128_t a,v128_t b)1285 v128_t test_v128_andnot(v128_t a, v128_t b) {
1286 return wasm_v128_andnot(a, b);
1287 }
1288
1289 // CHECK-LABEL: @test_v128_any_true(
1290 // CHECK-NEXT: entry:
1291 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1292 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6:[0-9]+]]
1293 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1294 // CHECK-NEXT: ret i1 [[TOBOOL_I]]
1295 //
test_v128_any_true(v128_t a)1296 bool test_v128_any_true(v128_t a) {
1297 return wasm_v128_any_true(a);
1298 }
1299
1300 // CHECK-LABEL: @test_v128_bitselect(
1301 // CHECK-NEXT: entry:
1302 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR6]]
1303 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
1304 //
test_v128_bitselect(v128_t a,v128_t b,v128_t mask)1305 v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
1306 return wasm_v128_bitselect(a, b, mask);
1307 }
1308
1309 // CHECK-LABEL: @test_i8x16_abs(
1310 // CHECK-NEXT: entry:
1311 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1312 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR6]]
1313 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1314 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1315 //
test_i8x16_abs(v128_t a)1316 v128_t test_i8x16_abs(v128_t a) {
1317 return wasm_i8x16_abs(a);
1318 }
1319
1320 // CHECK-LABEL: @test_i8x16_neg(
1321 // CHECK-NEXT: entry:
1322 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1323 // CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
1324 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1325 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1326 //
test_i8x16_neg(v128_t a)1327 v128_t test_i8x16_neg(v128_t a) {
1328 return wasm_i8x16_neg(a);
1329 }
1330
1331 // CHECK-LABEL: @test_i8x16_all_true(
1332 // CHECK-NEXT: entry:
1333 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1334 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1335 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1336 // CHECK-NEXT: ret i1 [[TOBOOL_I]]
1337 //
test_i8x16_all_true(v128_t a)1338 bool test_i8x16_all_true(v128_t a) {
1339 return wasm_i8x16_all_true(a);
1340 }
1341
1342 // CHECK-LABEL: @test_i8x16_bitmask(
1343 // CHECK-NEXT: entry:
1344 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1345 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1346 // CHECK-NEXT: ret i32 [[TMP1]]
1347 //
test_i8x16_bitmask(v128_t a)1348 int32_t test_i8x16_bitmask(v128_t a) {
1349 return wasm_i8x16_bitmask(a);
1350 }
1351
1352 // CHECK-LABEL: @test_i8x16_popcnt(
1353 // CHECK-NEXT: entry:
1354 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1355 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1356 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1357 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1358 //
test_i8x16_popcnt(v128_t a)1359 v128_t test_i8x16_popcnt(v128_t a) {
1360 return wasm_i8x16_popcnt(a);
1361 }
1362
1363 // CHECK-LABEL: @test_i8x16_shl(
1364 // CHECK-NEXT: entry:
1365 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1366 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1367 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1368 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1369 // CHECK-NEXT: [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
1370 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1371 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1372 //
test_i8x16_shl(v128_t a,int32_t b)1373 v128_t test_i8x16_shl(v128_t a, int32_t b) {
1374 return wasm_i8x16_shl(a, b);
1375 }
1376
1377 // CHECK-LABEL: @test_i8x16_shr(
1378 // CHECK-NEXT: entry:
1379 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1380 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1381 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1382 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1383 // CHECK-NEXT: [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1384 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1385 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1386 //
test_i8x16_shr(v128_t a,int32_t b)1387 v128_t test_i8x16_shr(v128_t a, int32_t b) {
1388 return wasm_i8x16_shr(a, b);
1389 }
1390
1391 // CHECK-LABEL: @test_u8x16_shr(
1392 // CHECK-NEXT: entry:
1393 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1394 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1395 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1396 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1397 // CHECK-NEXT: [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1398 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1399 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1400 //
test_u8x16_shr(v128_t a,int32_t b)1401 v128_t test_u8x16_shr(v128_t a, int32_t b) {
1402 return wasm_u8x16_shr(a, b);
1403 }
1404
1405 // CHECK-LABEL: @test_i8x16_add(
1406 // CHECK-NEXT: entry:
1407 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1408 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1409 // CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
1410 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
1411 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1412 //
test_i8x16_add(v128_t a,v128_t b)1413 v128_t test_i8x16_add(v128_t a, v128_t b) {
1414 return wasm_i8x16_add(a, b);
1415 }
1416
1417 // CHECK-LABEL: @test_i8x16_add_sat(
1418 // CHECK-NEXT: entry:
1419 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1420 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1421 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1422 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1423 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1424 //
test_i8x16_add_sat(v128_t a,v128_t b)1425 v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
1426 return wasm_i8x16_add_sat(a, b);
1427 }
1428
1429 // CHECK-LABEL: @test_u8x16_add_sat(
1430 // CHECK-NEXT: entry:
1431 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1432 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1433 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1434 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1435 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1436 //
test_u8x16_add_sat(v128_t a,v128_t b)1437 v128_t test_u8x16_add_sat(v128_t a, v128_t b) {
1438 return wasm_u8x16_add_sat(a, b);
1439 }
1440
1441 // CHECK-LABEL: @test_i8x16_sub(
1442 // CHECK-NEXT: entry:
1443 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1444 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1445 // CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
1446 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1447 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1448 //
test_i8x16_sub(v128_t a,v128_t b)1449 v128_t test_i8x16_sub(v128_t a, v128_t b) {
1450 return wasm_i8x16_sub(a, b);
1451 }
1452
1453 // CHECK-LABEL: @test_i8x16_sub_sat(
1454 // CHECK-NEXT: entry:
1455 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1456 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1457 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1458 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1459 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1460 //
test_i8x16_sub_sat(v128_t a,v128_t b)1461 v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
1462 return wasm_i8x16_sub_sat(a, b);
1463 }
1464
1465 // CHECK-LABEL: @test_u8x16_sub_sat(
1466 // CHECK-NEXT: entry:
1467 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1468 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1469 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1470 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1471 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1472 //
test_u8x16_sub_sat(v128_t a,v128_t b)1473 v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
1474 return wasm_u8x16_sub_sat(a, b);
1475 }
1476
1477 // CHECK-LABEL: @test_i8x16_min(
1478 // CHECK-NEXT: entry:
1479 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1480 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1481 // CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
1482 // CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1483 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1484 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1485 //
test_i8x16_min(v128_t a,v128_t b)1486 v128_t test_i8x16_min(v128_t a, v128_t b) {
1487 return wasm_i8x16_min(a, b);
1488 }
1489
1490 // CHECK-LABEL: @test_u8x16_min(
1491 // CHECK-NEXT: entry:
1492 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1493 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1494 // CHECK-NEXT: [[TMP2:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
1495 // CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1496 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1497 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1498 //
test_u8x16_min(v128_t a,v128_t b)1499 v128_t test_u8x16_min(v128_t a, v128_t b) {
1500 return wasm_u8x16_min(a, b);
1501 }
1502
1503 // CHECK-LABEL: @test_i8x16_max(
1504 // CHECK-NEXT: entry:
1505 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1506 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1507 // CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
1508 // CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1509 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1510 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1511 //
test_i8x16_max(v128_t a,v128_t b)1512 v128_t test_i8x16_max(v128_t a, v128_t b) {
1513 return wasm_i8x16_max(a, b);
1514 }
1515
1516 // CHECK-LABEL: @test_u8x16_max(
1517 // CHECK-NEXT: entry:
1518 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1519 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1520 // CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
1521 // CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1522 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1523 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1524 //
test_u8x16_max(v128_t a,v128_t b)1525 v128_t test_u8x16_max(v128_t a, v128_t b) {
1526 return wasm_u8x16_max(a, b);
1527 }
1528
1529 // CHECK-LABEL: @test_u8x16_avgr(
1530 // CHECK-NEXT: entry:
1531 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1532 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1533 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1534 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1535 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1536 //
test_u8x16_avgr(v128_t a,v128_t b)1537 v128_t test_u8x16_avgr(v128_t a, v128_t b) {
1538 return wasm_u8x16_avgr(a, b);
1539 }
1540
1541 // CHECK-LABEL: @test_i16x8_abs(
1542 // CHECK-NEXT: entry:
1543 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1544 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR6]]
1545 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
1546 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1547 //
test_i16x8_abs(v128_t a)1548 v128_t test_i16x8_abs(v128_t a) {
1549 return wasm_i16x8_abs(a);
1550 }
1551
1552 // CHECK-LABEL: @test_i16x8_neg(
1553 // CHECK-NEXT: entry:
1554 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1555 // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
1556 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1557 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1558 //
test_i16x8_neg(v128_t a)1559 v128_t test_i16x8_neg(v128_t a) {
1560 return wasm_i16x8_neg(a);
1561 }
1562
1563 // CHECK-LABEL: @test_i16x8_all_true(
1564 // CHECK-NEXT: entry:
1565 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1566 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR6]]
1567 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1568 // CHECK-NEXT: ret i1 [[TOBOOL_I]]
1569 //
test_i16x8_all_true(v128_t a)1570 bool test_i16x8_all_true(v128_t a) {
1571 return wasm_i16x8_all_true(a);
1572 }
1573
1574 // CHECK-LABEL: @test_i16x8_bitmask(
1575 // CHECK-NEXT: entry:
1576 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1577 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR6]]
1578 // CHECK-NEXT: ret i32 [[TMP1]]
1579 //
test_i16x8_bitmask(v128_t a)1580 int32_t test_i16x8_bitmask(v128_t a) {
1581 return wasm_i16x8_bitmask(a);
1582 }
1583
1584 // CHECK-LABEL: @test_i16x8_shl(
1585 // CHECK-NEXT: entry:
1586 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1587 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1588 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1589 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1590 // CHECK-NEXT: [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
1591 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1592 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1593 //
test_i16x8_shl(v128_t a,int32_t b)1594 v128_t test_i16x8_shl(v128_t a, int32_t b) {
1595 return wasm_i16x8_shl(a, b);
1596 }
1597
1598 // CHECK-LABEL: @test_i16x8_shr(
1599 // CHECK-NEXT: entry:
1600 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1601 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1602 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1603 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1604 // CHECK-NEXT: [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1605 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1606 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1607 //
test_i16x8_shr(v128_t a,int32_t b)1608 v128_t test_i16x8_shr(v128_t a, int32_t b) {
1609 return wasm_i16x8_shr(a, b);
1610 }
1611
1612 // CHECK-LABEL: @test_u16x8_shr(
1613 // CHECK-NEXT: entry:
1614 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1615 // CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1616 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1617 // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1618 // CHECK-NEXT: [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1619 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1620 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1621 //
test_u16x8_shr(v128_t a,int32_t b)1622 v128_t test_u16x8_shr(v128_t a, int32_t b) {
1623 return wasm_u16x8_shr(a, b);
1624 }
1625
1626 // CHECK-LABEL: @test_i16x8_add(
1627 // CHECK-NEXT: entry:
1628 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1629 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1630 // CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
1631 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
1632 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1633 //
test_i16x8_add(v128_t a,v128_t b)1634 v128_t test_i16x8_add(v128_t a, v128_t b) {
1635 return wasm_i16x8_add(a, b);
1636 }
1637
1638 // CHECK-LABEL: @test_i16x8_add_sat(
1639 // CHECK-NEXT: entry:
1640 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1641 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1642 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1643 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1644 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1645 //
test_i16x8_add_sat(v128_t a,v128_t b)1646 v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
1647 return wasm_i16x8_add_sat(a, b);
1648 }
1649
1650 // CHECK-LABEL: @test_u16x8_add_sat(
1651 // CHECK-NEXT: entry:
1652 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1653 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1655 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1656 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1657 //
test_u16x8_add_sat(v128_t a,v128_t b)1658 v128_t test_u16x8_add_sat(v128_t a, v128_t b) {
1659 return wasm_u16x8_add_sat(a, b);
1660 }
1661
1662 // CHECK-LABEL: @test_i16x8_sub(
1663 // CHECK-NEXT: entry:
1664 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1665 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1666 // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
1667 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1668 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1669 //
test_i16x8_sub(v128_t a,v128_t b)1670 v128_t test_i16x8_sub(v128_t a, v128_t b) {
1671 return wasm_i16x8_sub(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_i16x8_sub_sat(
1675 // CHECK-NEXT: entry:
1676 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1677 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1678 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1679 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1680 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1681 //
test_i16x8_sub_sat(v128_t a,v128_t b)1682 v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
1683 return wasm_i16x8_sub_sat(a, b);
1684 }
1685
1686 // CHECK-LABEL: @test_u16x8_sub_sat(
1687 // CHECK-NEXT: entry:
1688 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1689 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1691 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1692 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1693 //
test_u16x8_sub_sat(v128_t a,v128_t b)1694 v128_t test_u16x8_sub_sat(v128_t a, v128_t b) {
1695 return wasm_u16x8_sub_sat(a, b);
1696 }
1697
1698 // CHECK-LABEL: @test_i16x8_mul(
1699 // CHECK-NEXT: entry:
1700 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1701 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1702 // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
1703 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
1704 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1705 //
test_i16x8_mul(v128_t a,v128_t b)1706 v128_t test_i16x8_mul(v128_t a, v128_t b) {
1707 return wasm_i16x8_mul(a, b);
1708 }
1709
1710 // CHECK-LABEL: @test_i16x8_min(
1711 // CHECK-NEXT: entry:
1712 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1713 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1714 // CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
1715 // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1716 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1717 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1718 //
test_i16x8_min(v128_t a,v128_t b)1719 v128_t test_i16x8_min(v128_t a, v128_t b) {
1720 return wasm_i16x8_min(a, b);
1721 }
1722
1723 // CHECK-LABEL: @test_u16x8_min(
1724 // CHECK-NEXT: entry:
1725 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1726 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1727 // CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
1728 // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1729 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1730 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1731 //
test_u16x8_min(v128_t a,v128_t b)1732 v128_t test_u16x8_min(v128_t a, v128_t b) {
1733 return wasm_u16x8_min(a, b);
1734 }
1735
1736 // CHECK-LABEL: @test_i16x8_max(
1737 // CHECK-NEXT: entry:
1738 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1739 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1740 // CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
1741 // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1742 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1743 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1744 //
test_i16x8_max(v128_t a,v128_t b)1745 v128_t test_i16x8_max(v128_t a, v128_t b) {
1746 return wasm_i16x8_max(a, b);
1747 }
1748
1749 // CHECK-LABEL: @test_u16x8_max(
1750 // CHECK-NEXT: entry:
1751 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1752 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1753 // CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
1754 // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1755 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1756 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
1757 //
test_u16x8_max(v128_t a,v128_t b)1758 v128_t test_u16x8_max(v128_t a, v128_t b) {
1759 return wasm_u16x8_max(a, b);
1760 }
1761
1762 // CHECK-LABEL: @test_u16x8_avgr(
1763 // CHECK-NEXT: entry:
1764 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1765 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1766 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1767 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1768 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1769 //
test_u16x8_avgr(v128_t a,v128_t b)1770 v128_t test_u16x8_avgr(v128_t a, v128_t b) {
1771 return wasm_u16x8_avgr(a, b);
1772 }
1773
1774 // CHECK-LABEL: @test_i32x4_abs(
1775 // CHECK-NEXT: entry:
1776 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR6]]
1777 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
1778 //
test_i32x4_abs(v128_t a)1779 v128_t test_i32x4_abs(v128_t a) {
1780 return wasm_i32x4_abs(a);
1781 }
1782
1783 // CHECK-LABEL: @test_i32x4_neg(
1784 // CHECK-NEXT: entry:
1785 // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
1786 // CHECK-NEXT: ret <4 x i32> [[SUB_I]]
1787 //
test_i32x4_neg(v128_t a)1788 v128_t test_i32x4_neg(v128_t a) {
1789 return wasm_i32x4_neg(a);
1790 }
1791
1792 // CHECK-LABEL: @test_i32x4_all_true(
1793 // CHECK-NEXT: entry:
1794 // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR6]]
1795 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
1796 // CHECK-NEXT: ret i1 [[TOBOOL_I]]
1797 //
test_i32x4_all_true(v128_t a)1798 bool test_i32x4_all_true(v128_t a) {
1799 return wasm_i32x4_all_true(a);
1800 }
1801
1802 // CHECK-LABEL: @test_i32x4_bitmask(
1803 // CHECK-NEXT: entry:
1804 // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR6]]
1805 // CHECK-NEXT: ret i32 [[TMP0]]
1806 //
test_i32x4_bitmask(v128_t a)1807 int32_t test_i32x4_bitmask(v128_t a) {
1808 return wasm_i32x4_bitmask(a);
1809 }
1810
1811 // CHECK-LABEL: @test_i32x4_shl(
1812 // CHECK-NEXT: entry:
1813 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1814 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1815 // CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1816 // CHECK-NEXT: ret <4 x i32> [[SHL_I]]
1817 //
test_i32x4_shl(v128_t a,int32_t b)1818 v128_t test_i32x4_shl(v128_t a, int32_t b) {
1819 return wasm_i32x4_shl(a, b);
1820 }
1821
1822 // CHECK-LABEL: @test_i32x4_shr(
1823 // CHECK-NEXT: entry:
1824 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1825 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1826 // CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1827 // CHECK-NEXT: ret <4 x i32> [[SHR_I]]
1828 //
test_i32x4_shr(v128_t a,int32_t b)1829 v128_t test_i32x4_shr(v128_t a, int32_t b) {
1830 return wasm_i32x4_shr(a, b);
1831 }
1832
1833 // CHECK-LABEL: @test_u32x4_shr(
1834 // CHECK-NEXT: entry:
1835 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1836 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1837 // CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1838 // CHECK-NEXT: ret <4 x i32> [[SHR_I]]
1839 //
test_u32x4_shr(v128_t a,int32_t b)1840 v128_t test_u32x4_shr(v128_t a, int32_t b) {
1841 return wasm_u32x4_shr(a, b);
1842 }
1843
1844 // CHECK-LABEL: @test_i32x4_add(
1845 // CHECK-NEXT: entry:
1846 // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
1847 // CHECK-NEXT: ret <4 x i32> [[ADD_I]]
1848 //
test_i32x4_add(v128_t a,v128_t b)1849 v128_t test_i32x4_add(v128_t a, v128_t b) {
1850 return wasm_i32x4_add(a, b);
1851 }
1852
1853 // CHECK-LABEL: @test_i32x4_sub(
1854 // CHECK-NEXT: entry:
1855 // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
1856 // CHECK-NEXT: ret <4 x i32> [[SUB_I]]
1857 //
test_i32x4_sub(v128_t a,v128_t b)1858 v128_t test_i32x4_sub(v128_t a, v128_t b) {
1859 return wasm_i32x4_sub(a, b);
1860 }
1861
1862 // CHECK-LABEL: @test_i32x4_mul(
1863 // CHECK-NEXT: entry:
1864 // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
1865 // CHECK-NEXT: ret <4 x i32> [[MUL_I]]
1866 //
test_i32x4_mul(v128_t a,v128_t b)1867 v128_t test_i32x4_mul(v128_t a, v128_t b) {
1868 return wasm_i32x4_mul(a, b);
1869 }
1870
1871 // CHECK-LABEL: @test_i32x4_min(
1872 // CHECK-NEXT: entry:
1873 // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
1874 // CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1875 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1876 //
test_i32x4_min(v128_t a,v128_t b)1877 v128_t test_i32x4_min(v128_t a, v128_t b) {
1878 return wasm_i32x4_min(a, b);
1879 }
1880
1881 // CHECK-LABEL: @test_u32x4_min(
1882 // CHECK-NEXT: entry:
1883 // CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
1884 // CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1885 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1886 //
test_u32x4_min(v128_t a,v128_t b)1887 v128_t test_u32x4_min(v128_t a, v128_t b) {
1888 return wasm_u32x4_min(a, b);
1889 }
1890
1891 // CHECK-LABEL: @test_i32x4_max(
1892 // CHECK-NEXT: entry:
1893 // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
1894 // CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1895 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1896 //
test_i32x4_max(v128_t a,v128_t b)1897 v128_t test_i32x4_max(v128_t a, v128_t b) {
1898 return wasm_i32x4_max(a, b);
1899 }
1900
1901 // CHECK-LABEL: @test_u32x4_max(
1902 // CHECK-NEXT: entry:
1903 // CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
1904 // CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1905 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1906 //
test_u32x4_max(v128_t a,v128_t b)1907 v128_t test_u32x4_max(v128_t a, v128_t b) {
1908 return wasm_u32x4_max(a, b);
1909 }
1910
1911 // CHECK-LABEL: @test_i32x4_dot_i16x8(
1912 // CHECK-NEXT: entry:
1913 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1914 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1915 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1916 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1917 //
test_i32x4_dot_i16x8(v128_t a,v128_t b)1918 v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
1919 return wasm_i32x4_dot_i16x8(a, b);
1920 }
1921
1922 // CHECK-LABEL: @test_i64x2_abs(
1923 // CHECK-NEXT: entry:
1924 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1925 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR6]]
1926 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
1927 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
1928 //
test_i64x2_abs(v128_t a)1929 v128_t test_i64x2_abs(v128_t a) {
1930 return wasm_i64x2_abs(a);
1931 }
1932
1933 // CHECK-LABEL: @test_i64x2_neg(
1934 // CHECK-NEXT: entry:
1935 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1936 // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
1937 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
1938 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1939 //
test_i64x2_neg(v128_t a)1940 v128_t test_i64x2_neg(v128_t a) {
1941 return wasm_i64x2_neg(a);
1942 }
1943
1944 // CHECK-LABEL: @test_i64x2_all_true(
1945 // CHECK-NEXT: entry:
1946 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1947 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR6]]
1948 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1949 // CHECK-NEXT: ret i1 [[TOBOOL_I]]
1950 //
test_i64x2_all_true(v128_t a)1951 bool test_i64x2_all_true(v128_t a) {
1952 return wasm_i64x2_all_true(a);
1953 }
1954
1955 // CHECK-LABEL: @test_i64x2_bitmask(
1956 // CHECK-NEXT: entry:
1957 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1958 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR6]]
1959 // CHECK-NEXT: ret i32 [[TMP1]]
1960 //
test_i64x2_bitmask(v128_t a)1961 int32_t test_i64x2_bitmask(v128_t a) {
1962 return wasm_i64x2_bitmask(a);
1963 }
1964
1965 // CHECK-LABEL: @test_i64x2_shl(
1966 // CHECK-NEXT: entry:
1967 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1968 // CHECK-NEXT: [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1969 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1970 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1971 // CHECK-NEXT: [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
1972 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
1973 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1974 //
test_i64x2_shl(v128_t a,int32_t b)1975 v128_t test_i64x2_shl(v128_t a, int32_t b) {
1976 return wasm_i64x2_shl(a, b);
1977 }
1978
1979 // CHECK-LABEL: @test_i64x2_shr(
1980 // CHECK-NEXT: entry:
1981 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1982 // CHECK-NEXT: [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1983 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1984 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1985 // CHECK-NEXT: [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
1986 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
1987 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
1988 //
test_i64x2_shr(v128_t a,int32_t b)1989 v128_t test_i64x2_shr(v128_t a, int32_t b) {
1990 return wasm_i64x2_shr(a, b);
1991 }
1992
1993 // CHECK-LABEL: @test_u64x2_shr(
1994 // CHECK-NEXT: entry:
1995 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1996 // CHECK-NEXT: [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1997 // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1998 // CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1999 // CHECK-NEXT: [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2000 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2001 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2002 //
test_u64x2_shr(v128_t a,int32_t b)2003 v128_t test_u64x2_shr(v128_t a, int32_t b) {
2004 return wasm_u64x2_shr(a, b);
2005 }
2006
2007 // CHECK-LABEL: @test_i64x2_add(
2008 // CHECK-NEXT: entry:
2009 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2010 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2011 // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
2012 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
2013 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2014 //
test_i64x2_add(v128_t a,v128_t b)2015 v128_t test_i64x2_add(v128_t a, v128_t b) {
2016 return wasm_i64x2_add(a, b);
2017 }
2018
2019 // CHECK-LABEL: @test_i64x2_sub(
2020 // CHECK-NEXT: entry:
2021 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2022 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2023 // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
2024 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
2025 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2026 //
test_i64x2_sub(v128_t a,v128_t b)2027 v128_t test_i64x2_sub(v128_t a, v128_t b) {
2028 return wasm_i64x2_sub(a, b);
2029 }
2030
2031 // CHECK-LABEL: @test_i64x2_mul(
2032 // CHECK-NEXT: entry:
2033 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2034 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2035 // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
2036 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2037 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2038 //
test_i64x2_mul(v128_t a,v128_t b)2039 v128_t test_i64x2_mul(v128_t a, v128_t b) {
2040 return wasm_i64x2_mul(a, b);
2041 }
2042
2043 // CHECK-LABEL: @test_f32x4_abs(
2044 // CHECK-NEXT: entry:
2045 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2046 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2047 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2048 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2049 //
test_f32x4_abs(v128_t a)2050 v128_t test_f32x4_abs(v128_t a) {
2051 return wasm_f32x4_abs(a);
2052 }
2053
2054 // CHECK-LABEL: @test_f32x4_neg(
2055 // CHECK-NEXT: entry:
2056 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2057 // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
2058 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
2059 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2060 //
test_f32x4_neg(v128_t a)2061 v128_t test_f32x4_neg(v128_t a) {
2062 return wasm_f32x4_neg(a);
2063 }
2064
2065 // CHECK-LABEL: @test_f32x4_sqrt(
2066 // CHECK-NEXT: entry:
2067 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2068 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2069 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2070 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2071 //
test_f32x4_sqrt(v128_t a)2072 v128_t test_f32x4_sqrt(v128_t a) {
2073 return wasm_f32x4_sqrt(a);
2074 }
2075
2076 // CHECK-LABEL: @test_f32x4_ceil(
2077 // CHECK-NEXT: entry:
2078 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2079 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2080 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2081 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2082 //
test_f32x4_ceil(v128_t a)2083 v128_t test_f32x4_ceil(v128_t a) {
2084 return wasm_f32x4_ceil(a);
2085 }
2086
2087 // CHECK-LABEL: @test_f32x4_floor(
2088 // CHECK-NEXT: entry:
2089 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2090 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2091 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2092 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2093 //
test_f32x4_floor(v128_t a)2094 v128_t test_f32x4_floor(v128_t a) {
2095 return wasm_f32x4_floor(a);
2096 }
2097
2098 // CHECK-LABEL: @test_f32x4_trunc(
2099 // CHECK-NEXT: entry:
2100 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2101 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2102 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2103 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2104 //
test_f32x4_trunc(v128_t a)2105 v128_t test_f32x4_trunc(v128_t a) {
2106 return wasm_f32x4_trunc(a);
2107 }
2108
2109 // CHECK-LABEL: @test_f32x4_nearest(
2110 // CHECK-NEXT: entry:
2111 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2112 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2113 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2114 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2115 //
test_f32x4_nearest(v128_t a)2116 v128_t test_f32x4_nearest(v128_t a) {
2117 return wasm_f32x4_nearest(a);
2118 }
2119
2120 // CHECK-LABEL: @test_f32x4_add(
2121 // CHECK-NEXT: entry:
2122 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2123 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2124 // CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
2125 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
2126 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2127 //
test_f32x4_add(v128_t a,v128_t b)2128 v128_t test_f32x4_add(v128_t a, v128_t b) {
2129 return wasm_f32x4_add(a, b);
2130 }
2131
2132 // CHECK-LABEL: @test_f32x4_sub(
2133 // CHECK-NEXT: entry:
2134 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2135 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2136 // CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
2137 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
2138 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2139 //
test_f32x4_sub(v128_t a,v128_t b)2140 v128_t test_f32x4_sub(v128_t a, v128_t b) {
2141 return wasm_f32x4_sub(a, b);
2142 }
2143
2144 // CHECK-LABEL: @test_f32x4_mul(
2145 // CHECK-NEXT: entry:
2146 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2147 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2148 // CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
2149 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
2150 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2151 //
test_f32x4_mul(v128_t a,v128_t b)2152 v128_t test_f32x4_mul(v128_t a, v128_t b) {
2153 return wasm_f32x4_mul(a, b);
2154 }
2155
2156 // CHECK-LABEL: @test_f32x4_div(
2157 // CHECK-NEXT: entry:
2158 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2159 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2160 // CHECK-NEXT: [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
2161 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
2162 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2163 //
test_f32x4_div(v128_t a,v128_t b)2164 v128_t test_f32x4_div(v128_t a, v128_t b) {
2165 return wasm_f32x4_div(a, b);
2166 }
2167
2168 // CHECK-LABEL: @test_f32x4_min(
2169 // CHECK-NEXT: entry:
2170 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2171 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2172 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
2173 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2174 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2175 //
test_f32x4_min(v128_t a,v128_t b)2176 v128_t test_f32x4_min(v128_t a, v128_t b) {
2177 return wasm_f32x4_min(a, b);
2178 }
2179
2180 // CHECK-LABEL: @test_f32x4_max(
2181 // CHECK-NEXT: entry:
2182 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2183 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2184 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
2185 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2186 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2187 //
test_f32x4_max(v128_t a,v128_t b)2188 v128_t test_f32x4_max(v128_t a, v128_t b) {
2189 return wasm_f32x4_max(a, b);
2190 }
2191
2192 // CHECK-LABEL: @test_f32x4_pmin(
2193 // CHECK-NEXT: entry:
2194 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2195 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2196 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
2197 // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
2198 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2199 //
test_f32x4_pmin(v128_t a,v128_t b)2200 v128_t test_f32x4_pmin(v128_t a, v128_t b) {
2201 return wasm_f32x4_pmin(a, b);
2202 }
2203
2204 // CHECK-LABEL: @test_f32x4_pmax(
2205 // CHECK-NEXT: entry:
2206 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2207 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2208 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
2209 // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
2210 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2211 //
test_f32x4_pmax(v128_t a,v128_t b)2212 v128_t test_f32x4_pmax(v128_t a, v128_t b) {
2213 return wasm_f32x4_pmax(a, b);
2214 }
2215
2216 // CHECK-LABEL: @test_f64x2_abs(
2217 // CHECK-NEXT: entry:
2218 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2219 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2220 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2221 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2222 //
test_f64x2_abs(v128_t a)2223 v128_t test_f64x2_abs(v128_t a) {
2224 return wasm_f64x2_abs(a);
2225 }
2226
2227 // CHECK-LABEL: @test_f64x2_neg(
2228 // CHECK-NEXT: entry:
2229 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2230 // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
2231 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
2232 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2233 //
test_f64x2_neg(v128_t a)2234 v128_t test_f64x2_neg(v128_t a) {
2235 return wasm_f64x2_neg(a);
2236 }
2237
2238 // CHECK-LABEL: @test_f64x2_sqrt(
2239 // CHECK-NEXT: entry:
2240 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2241 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2242 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2243 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2244 //
test_f64x2_sqrt(v128_t a)2245 v128_t test_f64x2_sqrt(v128_t a) {
2246 return wasm_f64x2_sqrt(a);
2247 }
2248
2249 // CHECK-LABEL: @test_f64x2_ceil(
2250 // CHECK-NEXT: entry:
2251 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2252 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2253 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2254 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2255 //
test_f64x2_ceil(v128_t a)2256 v128_t test_f64x2_ceil(v128_t a) {
2257 return wasm_f64x2_ceil(a);
2258 }
2259
2260 // CHECK-LABEL: @test_f64x2_floor(
2261 // CHECK-NEXT: entry:
2262 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2263 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2264 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2265 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2266 //
test_f64x2_floor(v128_t a)2267 v128_t test_f64x2_floor(v128_t a) {
2268 return wasm_f64x2_floor(a);
2269 }
2270
2271 // CHECK-LABEL: @test_f64x2_trunc(
2272 // CHECK-NEXT: entry:
2273 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2274 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2275 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2276 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2277 //
test_f64x2_trunc(v128_t a)2278 v128_t test_f64x2_trunc(v128_t a) {
2279 return wasm_f64x2_trunc(a);
2280 }
2281
2282 // CHECK-LABEL: @test_f64x2_nearest(
2283 // CHECK-NEXT: entry:
2284 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2285 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2286 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2287 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2288 //
test_f64x2_nearest(v128_t a)2289 v128_t test_f64x2_nearest(v128_t a) {
2290 return wasm_f64x2_nearest(a);
2291 }
2292
2293 // CHECK-LABEL: @test_f64x2_add(
2294 // CHECK-NEXT: entry:
2295 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2296 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2297 // CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
2298 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
2299 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2300 //
test_f64x2_add(v128_t a,v128_t b)2301 v128_t test_f64x2_add(v128_t a, v128_t b) {
2302 return wasm_f64x2_add(a, b);
2303 }
2304
2305 // CHECK-LABEL: @test_f64x2_sub(
2306 // CHECK-NEXT: entry:
2307 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2308 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2309 // CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
2310 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
2311 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2312 //
test_f64x2_sub(v128_t a,v128_t b)2313 v128_t test_f64x2_sub(v128_t a, v128_t b) {
2314 return wasm_f64x2_sub(a, b);
2315 }
2316
2317 // CHECK-LABEL: @test_f64x2_mul(
2318 // CHECK-NEXT: entry:
2319 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2320 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2321 // CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
2322 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
2323 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2324 //
test_f64x2_mul(v128_t a,v128_t b)2325 v128_t test_f64x2_mul(v128_t a, v128_t b) {
2326 return wasm_f64x2_mul(a, b);
2327 }
2328
2329 // CHECK-LABEL: @test_f64x2_div(
2330 // CHECK-NEXT: entry:
2331 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2332 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2333 // CHECK-NEXT: [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
2334 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
2335 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2336 //
test_f64x2_div(v128_t a,v128_t b)2337 v128_t test_f64x2_div(v128_t a, v128_t b) {
2338 return wasm_f64x2_div(a, b);
2339 }
2340
2341 // CHECK-LABEL: @test_f64x2_min(
2342 // CHECK-NEXT: entry:
2343 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2344 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2345 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
2346 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2347 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2348 //
test_f64x2_min(v128_t a,v128_t b)2349 v128_t test_f64x2_min(v128_t a, v128_t b) {
2350 return wasm_f64x2_min(a, b);
2351 }
2352
2353 // CHECK-LABEL: @test_f64x2_max(
2354 // CHECK-NEXT: entry:
2355 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2356 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2357 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
2358 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2359 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2360 //
test_f64x2_max(v128_t a,v128_t b)2361 v128_t test_f64x2_max(v128_t a, v128_t b) {
2362 return wasm_f64x2_max(a, b);
2363 }
2364
2365 // CHECK-LABEL: @test_f64x2_pmin(
2366 // CHECK-NEXT: entry:
2367 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2368 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2369 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
2370 // CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP0]], <2 x double> [[TMP1]]
2371 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2372 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2373 //
test_f64x2_pmin(v128_t a,v128_t b)2374 v128_t test_f64x2_pmin(v128_t a, v128_t b) {
2375 return wasm_f64x2_pmin(a, b);
2376 }
2377
2378 // CHECK-LABEL: @test_f64x2_pmax(
2379 // CHECK-NEXT: entry:
2380 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2381 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2382 // CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
2383 // CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP1]], <2 x double> [[TMP0]]
2384 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2385 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2386 //
test_f64x2_pmax(v128_t a,v128_t b)2387 v128_t test_f64x2_pmax(v128_t a, v128_t b) {
2388 return wasm_f64x2_pmax(a, b);
2389 }
2390
2391 // CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
2392 // CHECK-NEXT: entry:
2393 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2394 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2395 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2396 //
test_i32x4_trunc_sat_f32x4(v128_t a)2397 v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
2398 return wasm_i32x4_trunc_sat_f32x4(a);
2399 }
2400
2401 // CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
2402 // CHECK-NEXT: entry:
2403 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2404 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2405 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2406 //
test_u32x4_trunc_sat_f32x4(v128_t a)2407 v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
2408 return wasm_u32x4_trunc_sat_f32x4(a);
2409 }
2410
2411 // CHECK-LABEL: @test_f32x4_convert_i32x4(
2412 // CHECK-NEXT: entry:
2413 // CHECK-NEXT: [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
2414 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2415 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2416 //
test_f32x4_convert_i32x4(v128_t a)2417 v128_t test_f32x4_convert_i32x4(v128_t a) {
2418 return wasm_f32x4_convert_i32x4(a);
2419 }
2420
2421 // CHECK-LABEL: @test_f32x4_convert_u32x4(
2422 // CHECK-NEXT: entry:
2423 // CHECK-NEXT: [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
2424 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2425 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2426 //
test_f32x4_convert_u32x4(v128_t a)2427 v128_t test_f32x4_convert_u32x4(v128_t a) {
2428 return wasm_f32x4_convert_u32x4(a);
2429 }
2430
2431 // CHECK-LABEL: @test_f64x2_convert_low_i32x4(
2432 // CHECK-NEXT: entry:
2433 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2434 // CHECK-NEXT: [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2435 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2436 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2437 //
test_f64x2_convert_low_i32x4(v128_t a)2438 v128_t test_f64x2_convert_low_i32x4(v128_t a) {
2439 return wasm_f64x2_convert_low_i32x4(a);
2440 }
2441
2442 // CHECK-LABEL: @test_f64x2_convert_low_u32x4(
2443 // CHECK-NEXT: entry:
2444 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2445 // CHECK-NEXT: [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2446 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2447 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2448 //
test_f64x2_convert_low_u32x4(v128_t a)2449 v128_t test_f64x2_convert_low_u32x4(v128_t a) {
2450 return wasm_f64x2_convert_low_u32x4(a);
2451 }
2452
2453 // CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
2454 // CHECK-NEXT: entry:
2455 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2456 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2457 // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2458 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2459 //
test_i32x4_trunc_sat_f64x2_zero(v128_t a)2460 v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
2461 return wasm_i32x4_trunc_sat_f64x2_zero(a);
2462 }
2463
2464 // CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
2465 // CHECK-NEXT: entry:
2466 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2467 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2468 // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2469 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2470 //
test_u32x4_trunc_sat_f64x2_zero(v128_t a)2471 v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
2472 return wasm_u32x4_trunc_sat_f64x2_zero(a);
2473 }
2474
2475 // CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
2476 // CHECK-NEXT: entry:
2477 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2478 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2479 // CHECK-NEXT: [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
2480 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2481 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2482 //
test_f32x4_demote_f64x2_zero(v128_t a)2483 v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
2484 return wasm_f32x4_demote_f64x2_zero(a);
2485 }
2486
2487 // CHECK-LABEL: @test_f64x2_promote_low_f32x4(
2488 // CHECK-NEXT: entry:
2489 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2490 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
2491 // CHECK-NEXT: [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
2492 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2493 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2494 //
test_f64x2_promote_low_f32x4(v128_t a)2495 v128_t test_f64x2_promote_low_f32x4(v128_t a) {
2496 return wasm_f64x2_promote_low_f32x4(a);
2497 }
2498
2499 // CHECK-LABEL: @test_i8x16_shuffle(
2500 // CHECK-NEXT: entry:
2501 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2502 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2503 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
2504 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2505 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2506 //
test_i8x16_shuffle(v128_t a,v128_t b)2507 v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
2508 return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
2509 }
2510
2511 // CHECK-LABEL: @test_i16x8_shuffle(
2512 // CHECK-NEXT: entry:
2513 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2514 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2515 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
2516 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2517 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2518 //
test_i16x8_shuffle(v128_t a,v128_t b)2519 v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
2520 return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
2521 }
2522
2523 // CHECK-LABEL: @test_i32x4_shuffle(
2524 // CHECK-NEXT: entry:
2525 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2526 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2527 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
2528 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2529 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2530 //
test_i32x4_shuffle(v128_t a,v128_t b)2531 v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
2532 return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
2533 }
2534
2535 // CHECK-LABEL: @test_i64x2_shuffle(
2536 // CHECK-NEXT: entry:
2537 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2538 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2539 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
2540 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2541 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2542 //
test_i64x2_shuffle(v128_t a,v128_t b)2543 v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
2544 return wasm_i64x2_shuffle(a, b, 1, 0);
2545 }
2546
2547 // CHECK-LABEL: @test_i8x16_swizzle(
2548 // CHECK-NEXT: entry:
2549 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2550 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2551 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
2552 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2553 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2554 //
test_i8x16_swizzle(v128_t a,v128_t b)2555 v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
2556 return wasm_i8x16_swizzle(a, b);
2557 }
2558
2559 // CHECK-LABEL: @test_i8x16_narrow_i16x8(
2560 // CHECK-NEXT: entry:
2561 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2562 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2563 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2564 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2565 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2566 //
test_i8x16_narrow_i16x8(v128_t a,v128_t b)2567 v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
2568 return wasm_i8x16_narrow_i16x8(a, b);
2569 }
2570
2571 // CHECK-LABEL: @test_u8x16_narrow_i16x8(
2572 // CHECK-NEXT: entry:
2573 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2574 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2575 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2576 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2577 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2578 //
test_u8x16_narrow_i16x8(v128_t a,v128_t b)2579 v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
2580 return wasm_u8x16_narrow_i16x8(a, b);
2581 }
2582
2583 // CHECK-LABEL: @test_i16x8_narrow_i32x4(
2584 // CHECK-NEXT: entry:
2585 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
2586 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2587 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2588 //
test_i16x8_narrow_i32x4(v128_t a,v128_t b)2589 v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
2590 return wasm_i16x8_narrow_i32x4(a, b);
2591 }
2592
2593 // CHECK-LABEL: @test_u16x8_narrow_i32x4(
2594 // CHECK-NEXT: entry:
2595 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
2596 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2597 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2598 //
test_u16x8_narrow_i32x4(v128_t a,v128_t b)2599 v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) {
2600 return wasm_u16x8_narrow_i32x4(a, b);
2601 }
2602
2603 // CHECK-LABEL: @test_i16x8_extend_low_i8x16(
2604 // CHECK-NEXT: entry:
2605 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2606 // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2607 // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2609 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2610 //
test_i16x8_extend_low_i8x16(v128_t a)2611 v128_t test_i16x8_extend_low_i8x16(v128_t a) {
2612 return wasm_i16x8_extend_low_i8x16(a);
2613 }
2614
2615 // CHECK-LABEL: @test_i16x8_extend_high_i8x16(
2616 // CHECK-NEXT: entry:
2617 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2618 // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2619 // CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2620 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2621 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2622 //
test_i16x8_extend_high_i8x16(v128_t a)2623 v128_t test_i16x8_extend_high_i8x16(v128_t a) {
2624 return wasm_i16x8_extend_high_i8x16(a);
2625 }
2626
2627 // CHECK-LABEL: @test_u16x8_extend_low_u8x16(
2628 // CHECK-NEXT: entry:
2629 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2630 // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2631 // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2632 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2633 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2634 //
test_u16x8_extend_low_u8x16(v128_t a)2635 v128_t test_u16x8_extend_low_u8x16(v128_t a) {
2636 return wasm_u16x8_extend_low_u8x16(a);
2637 }
2638
2639 // CHECK-LABEL: @test_u16x8_extend_high_u8x16(
2640 // CHECK-NEXT: entry:
2641 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2642 // CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2643 // CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2645 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2646 //
test_u16x8_extend_high_u8x16(v128_t a)2647 v128_t test_u16x8_extend_high_u8x16(v128_t a) {
2648 return wasm_u16x8_extend_high_u8x16(a);
2649 }
2650
2651 // CHECK-LABEL: @test_i32x4_extend_low_i16x8(
2652 // CHECK-NEXT: entry:
2653 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2654 // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2655 // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2656 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
2657 //
test_i32x4_extend_low_i16x8(v128_t a)2658 v128_t test_i32x4_extend_low_i16x8(v128_t a) {
2659 return wasm_i32x4_extend_low_i16x8(a);
2660 }
2661
2662 // CHECK-LABEL: @test_i32x4_extend_high_i16x8(
2663 // CHECK-NEXT: entry:
2664 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2665 // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2666 // CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2667 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
2668 //
test_i32x4_extend_high_i16x8(v128_t a)2669 v128_t test_i32x4_extend_high_i16x8(v128_t a) {
2670 return wasm_i32x4_extend_high_i16x8(a);
2671 }
2672
2673 // CHECK-LABEL: @test_u32x4_extend_low_u16x8(
2674 // CHECK-NEXT: entry:
2675 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2676 // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2677 // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2678 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
2679 //
test_u32x4_extend_low_u16x8(v128_t a)2680 v128_t test_u32x4_extend_low_u16x8(v128_t a) {
2681 return wasm_u32x4_extend_low_u16x8(a);
2682 }
2683
2684 // CHECK-LABEL: @test_u32x4_extend_high_u16x8(
2685 // CHECK-NEXT: entry:
2686 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2687 // CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2688 // CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2689 // CHECK-NEXT: ret <4 x i32> [[CONV_I]]
2690 //
test_u32x4_extend_high_u16x8(v128_t a)2691 v128_t test_u32x4_extend_high_u16x8(v128_t a) {
2692 return wasm_u32x4_extend_high_u16x8(a);
2693 }
2694
2695 // CHECK-LABEL: @test_i64x2_extend_low_i32x4(
2696 // CHECK-NEXT: entry:
2697 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2698 // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2699 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2700 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2701 //
test_i64x2_extend_low_i32x4(v128_t a)2702 v128_t test_i64x2_extend_low_i32x4(v128_t a) {
2703 return wasm_i64x2_extend_low_i32x4(a);
2704 }
2705
2706 // CHECK-LABEL: @test_i64x2_extend_high_i32x4(
2707 // CHECK-NEXT: entry:
2708 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2709 // CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2710 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2711 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2712 //
test_i64x2_extend_high_i32x4(v128_t a)2713 v128_t test_i64x2_extend_high_i32x4(v128_t a) {
2714 return wasm_i64x2_extend_high_i32x4(a);
2715 }
2716
2717 // CHECK-LABEL: @test_u64x2_extend_low_u32x4(
2718 // CHECK-NEXT: entry:
2719 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2720 // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2721 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2722 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2723 //
test_u64x2_extend_low_u32x4(v128_t a)2724 v128_t test_u64x2_extend_low_u32x4(v128_t a) {
2725 return wasm_u64x2_extend_low_u32x4(a);
2726 }
2727
2728 // CHECK-LABEL: @test_u64x2_extend_high_u32x4(
2729 // CHECK-NEXT: entry:
2730 // CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2731 // CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2732 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2733 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2734 //
test_u64x2_extend_high_u32x4(v128_t a)2735 v128_t test_u64x2_extend_high_u32x4(v128_t a) {
2736 return wasm_u64x2_extend_high_u32x4(a);
2737 }
2738
2739 // CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
2740 // CHECK-NEXT: entry:
2741 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2742 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR6]]
2743 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2744 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2745 //
test_i16x8_extadd_pairwise_i8x16(v128_t a)2746 v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
2747 return wasm_i16x8_extadd_pairwise_i8x16(a);
2748 }
2749
2750 // CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
2751 // CHECK-NEXT: entry:
2752 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2753 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR6]]
2754 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2755 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2756 //
test_u16x8_extadd_pairwise_u8x16(v128_t a)2757 v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
2758 return wasm_u16x8_extadd_pairwise_u8x16(a);
2759 }
2760
2761 // CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
2762 // CHECK-NEXT: entry:
2763 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2764 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR6]]
2765 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2766 //
test_i32x4_extadd_pairwise_i16x8(v128_t a)2767 v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
2768 return wasm_i32x4_extadd_pairwise_i16x8(a);
2769 }
2770
2771 // CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
2772 // CHECK-NEXT: entry:
2773 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2774 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR6]]
2775 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
2776 //
test_u32x4_extadd_pairwise_u16x8(v128_t a)2777 v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
2778 return wasm_u32x4_extadd_pairwise_u16x8(a);
2779 }
2780
2781 // CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
2782 // CHECK-NEXT: entry:
2783 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2784 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2785 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2786 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2787 // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2788 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2789 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2790 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2791 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2792 //
test_i16x8_extmul_low_i8x16(v128_t a,v128_t b)2793 v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
2794 return wasm_i16x8_extmul_low_i8x16(a, b);
2795 }
2796
2797 // CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
2798 // CHECK-NEXT: entry:
2799 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2800 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2801 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2802 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2803 // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2804 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2805 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2806 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2807 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2808 //
test_i16x8_extmul_high_i8x16(v128_t a,v128_t b)2809 v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
2810 return wasm_i16x8_extmul_high_i8x16(a, b);
2811 }
2812
2813 // CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
2814 // CHECK-NEXT: entry:
2815 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2816 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2817 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2818 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2819 // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2820 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2821 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2822 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2823 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2824 //
test_u16x8_extmul_low_u8x16(v128_t a,v128_t b)2825 v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
2826 return wasm_u16x8_extmul_low_u8x16(a, b);
2827 }
2828
2829 // CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
2830 // CHECK-NEXT: entry:
2831 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2832 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2833 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2834 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2835 // CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2836 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2837 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2838 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2839 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
2840 //
test_u16x8_extmul_high_u8x16(v128_t a,v128_t b)2841 v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
2842 return wasm_u16x8_extmul_high_u8x16(a, b);
2843 }
2844
2845 // CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
2846 // CHECK-NEXT: entry:
2847 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2848 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2849 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2850 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2851 // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2852 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2853 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2854 // CHECK-NEXT: ret <4 x i32> [[MUL_I]]
2855 //
test_i32x4_extmul_low_i16x8(v128_t a,v128_t b)2856 v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
2857 return wasm_i32x4_extmul_low_i16x8(a, b);
2858 }
2859
2860 // CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
2861 // CHECK-NEXT: entry:
2862 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2863 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2864 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2865 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2866 // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2867 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2868 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2869 // CHECK-NEXT: ret <4 x i32> [[MUL_I]]
2870 //
test_i32x4_extmul_high_i16x8(v128_t a,v128_t b)2871 v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
2872 return wasm_i32x4_extmul_high_i16x8(a, b);
2873 }
2874
2875 // CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
2876 // CHECK-NEXT: entry:
2877 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2878 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2879 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2880 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2881 // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2882 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2883 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2884 // CHECK-NEXT: ret <4 x i32> [[MUL_I]]
2885 //
test_u32x4_extmul_low_u16x8(v128_t a,v128_t b)2886 v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
2887 return wasm_u32x4_extmul_low_u16x8(a, b);
2888 }
2889
2890 // CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
2891 // CHECK-NEXT: entry:
2892 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2893 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2894 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2895 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2896 // CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2897 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2898 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2899 // CHECK-NEXT: ret <4 x i32> [[MUL_I]]
2900 //
test_u32x4_extmul_high_u16x8(v128_t a,v128_t b)2901 v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
2902 return wasm_u32x4_extmul_high_u16x8(a, b);
2903 }
2904
2905 // CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
2906 // CHECK-NEXT: entry:
2907 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2908 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2909 // CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2910 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2911 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2912 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2913 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2914 //
test_i64x2_extmul_low_i32x4(v128_t a,v128_t b)2915 v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
2916 return wasm_i64x2_extmul_low_i32x4(a, b);
2917 }
2918
2919 // CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
2920 // CHECK-NEXT: entry:
2921 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2922 // CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2923 // CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2924 // CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2925 // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2926 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2927 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2928 //
test_i64x2_extmul_high_i32x4(v128_t a,v128_t b)2929 v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
2930 return wasm_i64x2_extmul_high_i32x4(a, b);
2931 }
2932
2933 // CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
2934 // CHECK-NEXT: entry:
2935 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2936 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2937 // CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2938 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2939 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2940 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2941 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2942 //
test_u64x2_extmul_low_u32x4(v128_t a,v128_t b)2943 v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
2944 return wasm_u64x2_extmul_low_u32x4(a, b);
2945 }
2946
2947 // CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
2948 // CHECK-NEXT: entry:
2949 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2950 // CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2951 // CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2952 // CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2953 // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2954 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2955 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
2956 //
test_u64x2_extmul_high_u32x4(v128_t a,v128_t b)2957 v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
2958 return wasm_u64x2_extmul_high_u32x4(a, b);
2959 }
2960
2961 // CHECK-LABEL: @test_i16x8_q15mulr_sat(
2962 // CHECK-NEXT: entry:
2963 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2964 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2965 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2966 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
2967 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
2968 //
test_i16x8_q15mulr_sat(v128_t a,v128_t b)2969 v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
2970 return wasm_i16x8_q15mulr_sat(a, b);
2971 }
2972