1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --force-update
2 // REQUIRES: webassembly-registered-target, asserts
4 // FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline.
6 // RUN: %clang %s -O2 -emit-llvm -S -o - -target wasm32-unknown-unknown -msimd128 -Wcast-qual -fno-lax-vector-conversions -Werror | FileCheck %s
8 #include <wasm_simd128.h>
10 // CHECK-LABEL: @test_v128_load(
11 // CHECK-NEXT:  entry:
12 // CHECK-NEXT:    [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
13 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2:![0-9]+]]
14 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
15 //
test_v128_load(const void * mem)16 v128_t test_v128_load(const void *mem) {
17   return wasm_v128_load(mem);
18 }
20 // CHECK-LABEL: @test_v128_load8_splat(
21 // CHECK-NEXT:  entry:
22 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
23 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
24 // CHECK-NEXT:    [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
25 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
26 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
27 //
test_v128_load8_splat(const void * mem)28 v128_t test_v128_load8_splat(const void *mem) {
29   return wasm_v128_load8_splat(mem);
30 }
32 // CHECK-LABEL: @test_v128_load16_splat(
33 // CHECK-NEXT:  entry:
34 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i16*
35 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[__V1_I]], align 1, !tbaa [[TBAA2]]
36 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
37 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
38 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
39 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
40 //
test_v128_load16_splat(const void * mem)41 v128_t test_v128_load16_splat(const void *mem) {
42   return wasm_v128_load16_splat(mem);
43 }
45 // CHECK-LABEL: @test_v128_load32_splat(
46 // CHECK-NEXT:  entry:
47 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
48 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
49 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
50 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
51 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
52 //
test_v128_load32_splat(const void * mem)53 v128_t test_v128_load32_splat(const void *mem) {
54   return wasm_v128_load32_splat(mem);
55 }
57 // CHECK-LABEL: @test_v128_load64_splat(
58 // CHECK-NEXT:  entry:
59 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
60 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
61 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 0
62 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
63 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
64 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
65 //
test_v128_load64_splat(const void * mem)66 v128_t test_v128_load64_splat(const void *mem) {
67   return wasm_v128_load64_splat(mem);
68 }
70 // CHECK-LABEL: @test_i16x8_load8x8(
71 // CHECK-NEXT:  entry:
72 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
73 // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
74 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
75 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
76 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
77 //
test_i16x8_load8x8(const void * mem)78 v128_t test_i16x8_load8x8(const void *mem) {
79   return wasm_i16x8_load8x8(mem);
80 }
82 // CHECK-LABEL: @test_u16x8_load8x8(
83 // CHECK-NEXT:  entry:
84 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
85 // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
86 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
87 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
88 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
89 //
test_u16x8_load8x8(const void * mem)90 v128_t test_u16x8_load8x8(const void *mem) {
91   return wasm_u16x8_load8x8(mem);
92 }
94 // CHECK-LABEL: @test_i32x4_load16x4(
95 // CHECK-NEXT:  entry:
96 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
97 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
98 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
99 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
100 //
test_i32x4_load16x4(const void * mem)101 v128_t test_i32x4_load16x4(const void *mem) {
102   return wasm_i32x4_load16x4(mem);
103 }
105 // CHECK-LABEL: @test_u32x4_load16x4(
106 // CHECK-NEXT:  entry:
107 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
108 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
109 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
110 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
111 //
test_u32x4_load16x4(const void * mem)112 v128_t test_u32x4_load16x4(const void *mem) {
113   return wasm_u32x4_load16x4(mem);
114 }
116 // CHECK-LABEL: @test_i64x2_load32x2(
117 // CHECK-NEXT:  entry:
118 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
119 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
120 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
121 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
122 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
123 //
test_i64x2_load32x2(const void * mem)124 v128_t test_i64x2_load32x2(const void *mem) {
125   return wasm_i64x2_load32x2(mem);
126 }
128 // CHECK-LABEL: @test_u64x2_load32x2(
129 // CHECK-NEXT:  entry:
130 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
131 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
132 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
133 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
134 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
135 //
test_u64x2_load32x2(const void * mem)136 v128_t test_u64x2_load32x2(const void *mem) {
137   return wasm_u64x2_load32x2(mem);
138 }
140 // CHECK-LABEL: @test_v128_load32_zero(
141 // CHECK-NEXT:  entry:
142 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
143 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
144 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 0
145 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
146 //
test_v128_load32_zero(const void * mem)147 v128_t test_v128_load32_zero(const void *mem) {
148   return wasm_v128_load32_zero(mem);
149 }
151 // CHECK-LABEL: @test_v128_load64_zero(
152 // CHECK-NEXT:  entry:
153 // CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
154 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
155 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
156 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
157 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
158 //
test_v128_load64_zero(const void * mem)159 v128_t test_v128_load64_zero(const void *mem) {
160   return wasm_v128_load64_zero(mem);
161 }
163 // CHECK-LABEL: @test_v128_load8_lane(
164 // CHECK-NEXT:  entry:
165 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
166 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
167 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i32 15
168 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
169 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
170 //
test_v128_load8_lane(const uint8_t * ptr,v128_t vec)171 v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) {
172   return wasm_v128_load8_lane(ptr, vec, 15);
173 }
175 // CHECK-LABEL: @test_v128_load16_lane(
176 // CHECK-NEXT:  entry:
177 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
178 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
179 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i32 7
180 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
181 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
182 //
test_v128_load16_lane(const uint16_t * ptr,v128_t vec)183 v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) {
184   return wasm_v128_load16_lane(ptr, vec, 7);
185 }
187 // CHECK-LABEL: @test_v128_load32_lane(
188 // CHECK-NEXT:  entry:
189 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
190 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i32 3
191 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
192 //
test_v128_load32_lane(const uint32_t * ptr,v128_t vec)193 v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) {
194   return wasm_v128_load32_lane(ptr, vec, 3);
195 }
197 // CHECK-LABEL: @test_v128_load64_lane(
198 // CHECK-NEXT:  entry:
199 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
200 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
201 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i32 1
202 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
203 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
204 //
test_v128_load64_lane(const uint64_t * ptr,v128_t vec)205 v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) {
206   return wasm_v128_load64_lane(ptr, vec, 1);
207 }
209 // CHECK-LABEL: @test_v128_store(
210 // CHECK-NEXT:  entry:
211 // CHECK-NEXT:    [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
212 // CHECK-NEXT:    store <4 x i32> [[A:%.*]], <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2]]
213 // CHECK-NEXT:    ret void
214 //
test_v128_store(void * mem,v128_t a)215 void test_v128_store(void *mem, v128_t a) {
216   return wasm_v128_store(mem, a);
217 }
219 // CHECK-LABEL: @test_v128_store8_lane(
220 // CHECK-NEXT:  entry:
221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
222 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
223 // CHECK-NEXT:    store i8 [[VECEXT_I]], i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
224 // CHECK-NEXT:    ret void
225 //
test_v128_store8_lane(uint8_t * ptr,v128_t vec)226 void test_v128_store8_lane(uint8_t *ptr, v128_t vec) {
227   return wasm_v128_store8_lane(ptr, vec, 15);
228 }
230 // CHECK-LABEL: @test_v128_store16_lane(
231 // CHECK-NEXT:  entry:
232 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
233 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
234 // CHECK-NEXT:    store i16 [[VECEXT_I]], i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
235 // CHECK-NEXT:    ret void
236 //
test_v128_store16_lane(uint16_t * ptr,v128_t vec)237 void test_v128_store16_lane(uint16_t *ptr, v128_t vec) {
238   return wasm_v128_store16_lane(ptr, vec, 7);
239 }
241 // CHECK-LABEL: @test_v128_store32_lane(
242 // CHECK-NEXT:  entry:
243 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i32 3
244 // CHECK-NEXT:    store i32 [[VECEXT_I]], i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
245 // CHECK-NEXT:    ret void
246 //
test_v128_store32_lane(uint32_t * ptr,v128_t vec)247 void test_v128_store32_lane(uint32_t *ptr, v128_t vec) {
248   return wasm_v128_store32_lane(ptr, vec, 3);
249 }
251 // CHECK-LABEL: @test_v128_store64_lane(
252 // CHECK-NEXT:  entry:
253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
254 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
255 // CHECK-NEXT:    store i64 [[VECEXT_I]], i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
256 // CHECK-NEXT:    ret void
257 //
test_v128_store64_lane(uint64_t * ptr,v128_t vec)258 void test_v128_store64_lane(uint64_t *ptr, v128_t vec) {
259   return wasm_v128_store64_lane(ptr, vec, 1);
260 }
262 // CHECK-LABEL: @test_i8x16_make(
263 // CHECK-NEXT:  entry:
264 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[C0:%.*]], i32 0
265 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i32 1
266 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i32 2
267 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i32 3
268 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i32 4
269 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i32 5
270 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i32 6
271 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i32 7
272 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i32 8
273 // CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i32 9
274 // CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i32 10
275 // CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i32 11
276 // CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i32 12
277 // CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i32 13
278 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i32 14
279 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i32 15
280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
281 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
282 //
test_i8x16_make(int8_t c0,int8_t c1,int8_t c2,int8_t c3,int8_t c4,int8_t c5,int8_t c6,int8_t c7,int8_t c8,int8_t c9,int8_t c10,int8_t c11,int8_t c12,int8_t c13,int8_t c14,int8_t c15)283 v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
284   return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
285 }
287 // CHECK-LABEL: @test_i16x8_make(
288 // CHECK-NEXT:  entry:
289 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[C0:%.*]], i32 0
290 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i32 1
291 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i32 2
292 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i32 3
293 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i32 4
294 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i32 5
295 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i32 6
296 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i32 7
297 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
298 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
299 //
test_i16x8_make(int16_t c0,int16_t c1,int16_t c2,int16_t c3,int16_t c4,int16_t c5,int16_t c6,int16_t c7)300 v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
301   return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
302 }
304 // CHECK-LABEL: @test_i32x4_make(
305 // CHECK-NEXT:  entry:
306 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[C0:%.*]], i32 0
307 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i32 1
308 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i32 2
309 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i32 3
310 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
311 //
test_i32x4_make(int32_t c0,int32_t c1,int32_t c2,int32_t c3)312 v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
313   return wasm_i32x4_make(c0, c1, c2, c3);
314 }
316 // CHECK-LABEL: @test_i64x2_make(
317 // CHECK-NEXT:  entry:
318 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[C0:%.*]], i32 0
319 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i32 1
320 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
321 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
322 //
test_i64x2_make(int64_t c0,int64_t c1)323 v128_t test_i64x2_make(int64_t c0, int64_t c1) {
324   return wasm_i64x2_make(c0, c1);
325 }
327 // CHECK-LABEL: @test_f32x4_make(
328 // CHECK-NEXT:  entry:
329 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[C0:%.*]], i32 0
330 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i32 1
331 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i32 2
332 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i32 3
333 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
334 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
335 //
test_f32x4_make(float c0,float c1,float c2,float c3)336 v128_t test_f32x4_make(float c0, float c1, float c2, float c3) {
337   return wasm_f32x4_make(c0, c1, c2, c3);
338 }
340 // CHECK-LABEL: @test_f64x2_make(
341 // CHECK-NEXT:  entry:
342 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[C0:%.*]], i32 0
343 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i32 1
344 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
345 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
346 //
test_f64x2_make(double c0,double c1)347 v128_t test_f64x2_make(double c0, double c1) {
348   return wasm_f64x2_make(c0, c1);
349 }
351 // CHECK-LABEL: @test_i8x16_const(
352 // CHECK-NEXT:  entry:
353 // CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
354 //
test_i8x16_const()355 v128_t test_i8x16_const() {
356   return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
357 }
359 // CHECK-LABEL: @test_i16x8_const(
360 // CHECK-NEXT:  entry:
361 // CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
362 //
test_i16x8_const()363 v128_t test_i16x8_const() {
364   return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
365 }
367 // CHECK-LABEL: @test_i32x4_const(
368 // CHECK-NEXT:  entry:
369 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
370 //
test_i32x4_const()371 v128_t test_i32x4_const() {
372   return wasm_i32x4_const(0, 1, 2, 3);
373 }
375 // CHECK-LABEL: @test_i64x2_const(
376 // CHECK-NEXT:  entry:
377 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
378 //
test_i64x2_const()379 v128_t test_i64x2_const() {
380   return wasm_i64x2_const(0, 1);
381 }
383 // CHECK-LABEL: @test_f32x4_const(
384 // CHECK-NEXT:  entry:
385 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
386 //
test_f32x4_const()387 v128_t test_f32x4_const() {
388   return wasm_f32x4_const(0, 1, 2, 3);
389 }
391 // CHECK-LABEL: @test_f64x2_const(
392 // CHECK-NEXT:  entry:
393 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
394 //
test_f64x2_const()395 v128_t test_f64x2_const() {
396   return wasm_f64x2_const(0, 1);
397 }
399 // CHECK-LABEL: @test_i8x16_const_splat(
400 // CHECK-NEXT:  entry:
401 // CHECK-NEXT:    ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
402 //
test_i8x16_const_splat()403 v128_t test_i8x16_const_splat() {
404   return wasm_i8x16_const_splat(42);
405 }
407 // CHECK-LABEL: @test_i16x8_const_splat(
408 // CHECK-NEXT:  entry:
409 // CHECK-NEXT:    ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
410 //
test_i16x8_const_splat()411 v128_t test_i16x8_const_splat() {
412   return wasm_i16x8_const_splat(42);
413 }
415 // CHECK-LABEL: @test_i32x4_const_splat(
416 // CHECK-NEXT:  entry:
417 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
418 //
test_i32x4_const_splat()419 v128_t test_i32x4_const_splat() {
420   return wasm_i32x4_const_splat(42);
421 }
423 // CHECK-LABEL: @test_i64x2_const_splat(
424 // CHECK-NEXT:  entry:
425 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
426 //
test_i64x2_const_splat()427 v128_t test_i64x2_const_splat() {
428   return wasm_i64x2_const_splat(42);
429 }
431 // CHECK-LABEL: @test_f32x4_const_splat(
432 // CHECK-NEXT:  entry:
433 // CHECK-NEXT:    ret <4 x i32> <i32 1109917696, i32 1109917696, i32 1109917696, i32 1109917696>
434 //
test_f32x4_const_splat()435 v128_t test_f32x4_const_splat() {
436   return wasm_f32x4_const_splat(42);
437 }
439 // CHECK-LABEL: @test_f64x2_const_splat(
440 // CHECK-NEXT:  entry:
441 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1078263808, i32 0, i32 1078263808>
442 //
test_f64x2_const_splat()443 v128_t test_f64x2_const_splat() {
444   return wasm_f64x2_const_splat(42);
445 }
447 // CHECK-LABEL: @test_i8x16_splat(
448 // CHECK-NEXT:  entry:
449 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
450 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
451 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
452 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
453 //
test_i8x16_splat(int8_t a)454 v128_t test_i8x16_splat(int8_t a) {
455   return wasm_i8x16_splat(a);
456 }
458 // CHECK-LABEL: @test_i8x16_extract_lane(
459 // CHECK-NEXT:  entry:
460 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
461 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
462 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
463 //
test_i8x16_extract_lane(v128_t a)464 int8_t test_i8x16_extract_lane(v128_t a) {
465   return wasm_i8x16_extract_lane(a, 15);
466 }
468 // CHECK-LABEL: @test_u8x16_extract_lane(
469 // CHECK-NEXT:  entry:
470 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
471 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
472 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
473 //
test_u8x16_extract_lane(v128_t a)474 uint8_t test_u8x16_extract_lane(v128_t a) {
475   return wasm_u8x16_extract_lane(a, 15);
476 }
478 // CHECK-LABEL: @test_i8x16_replace_lane(
479 // CHECK-NEXT:  entry:
480 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
481 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i32 15
482 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
483 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
484 //
test_i8x16_replace_lane(v128_t a,int8_t b)485 v128_t test_i8x16_replace_lane(v128_t a, int8_t b) {
486   return wasm_i8x16_replace_lane(a, 15, b);
487 }
489 // CHECK-LABEL: @test_i16x8_splat(
490 // CHECK-NEXT:  entry:
491 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
492 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
493 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
494 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
495 //
test_i16x8_splat(int16_t a)496 v128_t test_i16x8_splat(int16_t a) {
497   return wasm_i16x8_splat(a);
498 }
500 // CHECK-LABEL: @test_i16x8_extract_lane(
501 // CHECK-NEXT:  entry:
502 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
503 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
504 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
505 //
test_i16x8_extract_lane(v128_t a)506 int16_t test_i16x8_extract_lane(v128_t a) {
507   return wasm_i16x8_extract_lane(a, 7);
508 }
510 // CHECK-LABEL: @test_u16x8_extract_lane(
511 // CHECK-NEXT:  entry:
512 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
513 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
514 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
515 //
test_u16x8_extract_lane(v128_t a)516 uint16_t test_u16x8_extract_lane(v128_t a) {
517   return wasm_u16x8_extract_lane(a, 7);
518 }
520 // CHECK-LABEL: @test_i16x8_replace_lane(
521 // CHECK-NEXT:  entry:
522 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
523 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i32 7
524 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
525 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
526 //
test_i16x8_replace_lane(v128_t a,int16_t b)527 v128_t test_i16x8_replace_lane(v128_t a, int16_t b) {
528   return wasm_i16x8_replace_lane(a, 7, b);
529 }
531 // CHECK-LABEL: @test_i32x4_splat(
532 // CHECK-NEXT:  entry:
533 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
534 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
535 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
536 //
test_i32x4_splat(int32_t a)537 v128_t test_i32x4_splat(int32_t a) {
538   return wasm_i32x4_splat(a);
539 }
541 // CHECK-LABEL: @test_i32x4_extract_lane(
542 // CHECK-NEXT:  entry:
543 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
544 // CHECK-NEXT:    ret i32 [[VECEXT_I]]
545 //
test_i32x4_extract_lane(v128_t a)546 int32_t test_i32x4_extract_lane(v128_t a) {
547   return wasm_i32x4_extract_lane(a, 3);
548 }
550 // CHECK-LABEL: @test_i32x4_replace_lane(
551 // CHECK-NEXT:  entry:
552 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 3
553 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
554 //
test_i32x4_replace_lane(v128_t a,int32_t b)555 v128_t test_i32x4_replace_lane(v128_t a, int32_t b) {
556   return wasm_i32x4_replace_lane(a, 3, b);
557 }
559 // CHECK-LABEL: @test_i64x2_splat(
560 // CHECK-NEXT:  entry:
561 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i32 0
562 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
563 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
564 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
565 //
test_i64x2_splat(int64_t a)566 v128_t test_i64x2_splat(int64_t a) {
567   return wasm_i64x2_splat(a);
568 }
570 // CHECK-LABEL: @test_i64x2_extract_lane(
571 // CHECK-NEXT:  entry:
572 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
573 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
574 // CHECK-NEXT:    ret i64 [[VECEXT_I]]
575 //
test_i64x2_extract_lane(v128_t a)576 int64_t test_i64x2_extract_lane(v128_t a) {
577   return wasm_i64x2_extract_lane(a, 1);
578 }
580 // CHECK-LABEL: @test_i64x2_replace_lane(
581 // CHECK-NEXT:  entry:
582 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
583 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i32 1
584 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
585 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
586 //
test_i64x2_replace_lane(v128_t a,int64_t b)587 v128_t test_i64x2_replace_lane(v128_t a, int64_t b) {
588   return wasm_i64x2_replace_lane(a, 1, b);
589 }
591 // CHECK-LABEL: @test_f32x4_splat(
592 // CHECK-NEXT:  entry:
593 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
594 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
595 // CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> zeroinitializer
596 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
597 //
test_f32x4_splat(float a)598 v128_t test_f32x4_splat(float a) {
599   return wasm_f32x4_splat(a);
600 }
602 // CHECK-LABEL: @test_f32x4_extract_lane(
603 // CHECK-NEXT:  entry:
604 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
605 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
606 // CHECK-NEXT:    ret float [[VECEXT_I]]
607 //
test_f32x4_extract_lane(v128_t a)608 float test_f32x4_extract_lane(v128_t a) {
609   return wasm_f32x4_extract_lane(a, 3);
610 }
612 // CHECK-LABEL: @test_f32x4_replace_lane(
613 // CHECK-NEXT:  entry:
614 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
615 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 3
616 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32>
617 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
618 //
test_f32x4_replace_lane(v128_t a,float b)619 v128_t test_f32x4_replace_lane(v128_t a, float b) {
620   return wasm_f32x4_replace_lane(a, 3, b);
621 }
623 // CHECK-LABEL: @test_f64x2_splat(
624 // CHECK-NEXT:  entry:
625 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
626 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer
627 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
628 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
629 //
test_f64x2_splat(double a)630 v128_t test_f64x2_splat(double a) {
631   return wasm_f64x2_splat(a);
632 }
634 // CHECK-LABEL: @test_f64x2_extract_lane(
635 // CHECK-NEXT:  entry:
636 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
637 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
638 // CHECK-NEXT:    ret double [[VECEXT_I]]
639 //
test_f64x2_extract_lane(v128_t a)640 double test_f64x2_extract_lane(v128_t a) {
641   return wasm_f64x2_extract_lane(a, 1);
642 }
644 // CHECK-LABEL: @test_f64x2_replace_lane(
645 // CHECK-NEXT:  entry:
646 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
647 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i32 1
648 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32>
649 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
650 //
test_f64x2_replace_lane(v128_t a,double b)651 v128_t test_f64x2_replace_lane(v128_t a, double b) {
652   return wasm_f64x2_replace_lane(a, 1, b);
653 }
655 // CHECK-LABEL: @test_i8x16_eq(
656 // CHECK-NEXT:  entry:
657 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
658 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
659 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
660 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
661 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
662 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
663 //
test_i8x16_eq(v128_t a,v128_t b)664 v128_t test_i8x16_eq(v128_t a, v128_t b) {
665   return wasm_i8x16_eq(a, b);
666 }
668 // CHECK-LABEL: @test_i8x16_ne(
669 // CHECK-NEXT:  entry:
670 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
671 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
672 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
673 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
674 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
675 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
676 //
test_i8x16_ne(v128_t a,v128_t b)677 v128_t test_i8x16_ne(v128_t a, v128_t b) {
678   return wasm_i8x16_ne(a, b);
679 }
681 // CHECK-LABEL: @test_i8x16_lt(
682 // CHECK-NEXT:  entry:
683 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
684 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
685 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
686 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
687 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
688 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
689 //
test_i8x16_lt(v128_t a,v128_t b)690 v128_t test_i8x16_lt(v128_t a, v128_t b) {
691   return wasm_i8x16_lt(a, b);
692 }
694 // CHECK-LABEL: @test_u8x16_lt(
695 // CHECK-NEXT:  entry:
696 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
697 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
698 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
699 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
700 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
701 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
702 //
test_u8x16_lt(v128_t a,v128_t b)703 v128_t test_u8x16_lt(v128_t a, v128_t b) {
704   return wasm_u8x16_lt(a, b);
705 }
707 // CHECK-LABEL: @test_i8x16_gt(
708 // CHECK-NEXT:  entry:
709 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
710 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
711 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
712 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
713 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
714 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
715 //
test_i8x16_gt(v128_t a,v128_t b)716 v128_t test_i8x16_gt(v128_t a, v128_t b) {
717   return wasm_i8x16_gt(a, b);
718 }
720 // CHECK-LABEL: @test_u8x16_gt(
721 // CHECK-NEXT:  entry:
722 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
723 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
724 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
725 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
726 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
727 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
728 //
test_u8x16_gt(v128_t a,v128_t b)729 v128_t test_u8x16_gt(v128_t a, v128_t b) {
730   return wasm_u8x16_gt(a, b);
731 }
733 // CHECK-LABEL: @test_i8x16_le(
734 // CHECK-NEXT:  entry:
735 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
736 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
737 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
738 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
739 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
740 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
741 //
test_i8x16_le(v128_t a,v128_t b)742 v128_t test_i8x16_le(v128_t a, v128_t b) {
743   return wasm_i8x16_le(a, b);
744 }
746 // CHECK-LABEL: @test_u8x16_le(
747 // CHECK-NEXT:  entry:
748 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
749 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
750 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
751 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
752 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
753 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
754 //
test_u8x16_le(v128_t a,v128_t b)755 v128_t test_u8x16_le(v128_t a, v128_t b) {
756   return wasm_u8x16_le(a, b);
757 }
759 // CHECK-LABEL: @test_i8x16_ge(
760 // CHECK-NEXT:  entry:
761 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
762 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
763 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
764 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
765 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
766 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
767 //
test_i8x16_ge(v128_t a,v128_t b)768 v128_t test_i8x16_ge(v128_t a, v128_t b) {
769   return wasm_i8x16_ge(a, b);
770 }
772 // CHECK-LABEL: @test_u8x16_ge(
773 // CHECK-NEXT:  entry:
774 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
775 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
776 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
777 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
778 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
779 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
780 //
test_u8x16_ge(v128_t a,v128_t b)781 v128_t test_u8x16_ge(v128_t a, v128_t b) {
782   return wasm_u8x16_ge(a, b);
783 }
785 // CHECK-LABEL: @test_i16x8_eq(
786 // CHECK-NEXT:  entry:
787 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
788 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
789 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
790 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
791 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
792 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
793 //
test_i16x8_eq(v128_t a,v128_t b)794 v128_t test_i16x8_eq(v128_t a, v128_t b) {
795   return wasm_i16x8_eq(a, b);
796 }
798 // CHECK-LABEL: @test_i16x8_ne(
799 // CHECK-NEXT:  entry:
800 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
801 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
802 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
803 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
804 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
805 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
806 //
test_i16x8_ne(v128_t a,v128_t b)807 v128_t test_i16x8_ne(v128_t a, v128_t b) {
808   return wasm_i16x8_ne(a, b);
809 }
811 // CHECK-LABEL: @test_i16x8_lt(
812 // CHECK-NEXT:  entry:
813 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
814 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
815 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
816 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
817 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
818 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
819 //
test_i16x8_lt(v128_t a,v128_t b)820 v128_t test_i16x8_lt(v128_t a, v128_t b) {
821   return wasm_i16x8_lt(a, b);
822 }
824 // CHECK-LABEL: @test_u16x8_lt(
825 // CHECK-NEXT:  entry:
826 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
827 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
828 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
829 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
830 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
831 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
832 //
test_u16x8_lt(v128_t a,v128_t b)833 v128_t test_u16x8_lt(v128_t a, v128_t b) {
834   return wasm_u16x8_lt(a, b);
835 }
837 // CHECK-LABEL: @test_i16x8_gt(
838 // CHECK-NEXT:  entry:
839 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
840 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
841 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
842 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
843 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
844 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
845 //
test_i16x8_gt(v128_t a,v128_t b)846 v128_t test_i16x8_gt(v128_t a, v128_t b) {
847   return wasm_i16x8_gt(a, b);
848 }
850 // CHECK-LABEL: @test_u16x8_gt(
851 // CHECK-NEXT:  entry:
852 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
853 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
854 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
855 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
856 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
857 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
858 //
test_u16x8_gt(v128_t a,v128_t b)859 v128_t test_u16x8_gt(v128_t a, v128_t b) {
860   return wasm_u16x8_gt(a, b);
861 }
863 // CHECK-LABEL: @test_i16x8_le(
864 // CHECK-NEXT:  entry:
865 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
866 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
867 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
868 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
869 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
870 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
871 //
test_i16x8_le(v128_t a,v128_t b)872 v128_t test_i16x8_le(v128_t a, v128_t b) {
873   return wasm_i16x8_le(a, b);
874 }
876 // CHECK-LABEL: @test_u16x8_le(
877 // CHECK-NEXT:  entry:
878 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
879 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
880 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
881 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
882 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
883 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
884 //
test_u16x8_le(v128_t a,v128_t b)885 v128_t test_u16x8_le(v128_t a, v128_t b) {
886   return wasm_u16x8_le(a, b);
887 }
889 // CHECK-LABEL: @test_i16x8_ge(
890 // CHECK-NEXT:  entry:
891 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
892 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
893 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
894 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
895 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
896 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
897 //
test_i16x8_ge(v128_t a,v128_t b)898 v128_t test_i16x8_ge(v128_t a, v128_t b) {
899   return wasm_i16x8_ge(a, b);
900 }
902 // CHECK-LABEL: @test_u16x8_ge(
903 // CHECK-NEXT:  entry:
904 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
905 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
906 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
907 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
908 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
909 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
910 //
test_u16x8_ge(v128_t a,v128_t b)911 v128_t test_u16x8_ge(v128_t a, v128_t b) {
912   return wasm_u16x8_ge(a, b);
913 }
915 // CHECK-LABEL: @test_i32x4_eq(
916 // CHECK-NEXT:  entry:
917 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
918 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
919 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
920 //
test_i32x4_eq(v128_t a,v128_t b)921 v128_t test_i32x4_eq(v128_t a, v128_t b) {
922   return wasm_i32x4_eq(a, b);
923 }
925 // CHECK-LABEL: @test_i32x4_ne(
926 // CHECK-NEXT:  entry:
927 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
928 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
929 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
930 //
test_i32x4_ne(v128_t a,v128_t b)931 v128_t test_i32x4_ne(v128_t a, v128_t b) {
932   return wasm_i32x4_ne(a, b);
933 }
935 // CHECK-LABEL: @test_i32x4_lt(
936 // CHECK-NEXT:  entry:
937 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
938 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
939 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
940 //
test_i32x4_lt(v128_t a,v128_t b)941 v128_t test_i32x4_lt(v128_t a, v128_t b) {
942   return wasm_i32x4_lt(a, b);
943 }
945 // CHECK-LABEL: @test_u32x4_lt(
946 // CHECK-NEXT:  entry:
947 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
948 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
949 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
950 //
test_u32x4_lt(v128_t a,v128_t b)951 v128_t test_u32x4_lt(v128_t a, v128_t b) {
952   return wasm_u32x4_lt(a, b);
953 }
955 // CHECK-LABEL: @test_i32x4_gt(
956 // CHECK-NEXT:  entry:
957 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
958 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
959 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
960 //
test_i32x4_gt(v128_t a,v128_t b)961 v128_t test_i32x4_gt(v128_t a, v128_t b) {
962   return wasm_i32x4_gt(a, b);
963 }
965 // CHECK-LABEL: @test_u32x4_gt(
966 // CHECK-NEXT:  entry:
967 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
968 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
969 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
970 //
test_u32x4_gt(v128_t a,v128_t b)971 v128_t test_u32x4_gt(v128_t a, v128_t b) {
972   return wasm_u32x4_gt(a, b);
973 }
975 // CHECK-LABEL: @test_i32x4_le(
976 // CHECK-NEXT:  entry:
977 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
978 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
979 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
980 //
test_i32x4_le(v128_t a,v128_t b)981 v128_t test_i32x4_le(v128_t a, v128_t b) {
982   return wasm_i32x4_le(a, b);
983 }
985 // CHECK-LABEL: @test_u32x4_le(
986 // CHECK-NEXT:  entry:
987 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
988 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
989 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
990 //
test_u32x4_le(v128_t a,v128_t b)991 v128_t test_u32x4_le(v128_t a, v128_t b) {
992   return wasm_u32x4_le(a, b);
993 }
995 // CHECK-LABEL: @test_i32x4_ge(
996 // CHECK-NEXT:  entry:
997 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
998 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
999 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1000 //
test_i32x4_ge(v128_t a,v128_t b)1001 v128_t test_i32x4_ge(v128_t a, v128_t b) {
1002   return wasm_i32x4_ge(a, b);
1003 }
1005 // CHECK-LABEL: @test_u32x4_ge(
1006 // CHECK-NEXT:  entry:
1007 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
1008 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1009 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1010 //
test_u32x4_ge(v128_t a,v128_t b)1011 v128_t test_u32x4_ge(v128_t a, v128_t b) {
1012   return wasm_u32x4_ge(a, b);
1013 }
1015 // CHECK-LABEL: @test_i64x2_eq(
1016 // CHECK-NEXT:  entry:
1017 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1018 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1019 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
1020 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1021 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1022 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1023 //
test_i64x2_eq(v128_t a,v128_t b)1024 v128_t test_i64x2_eq(v128_t a, v128_t b) {
1025   return wasm_i64x2_eq(a, b);
1026 }
1028 // CHECK-LABEL: @test_i64x2_ne(
1029 // CHECK-NEXT:  entry:
1030 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1031 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1032 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
1033 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1034 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1035 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1036 //
test_i64x2_ne(v128_t a,v128_t b)1037 v128_t test_i64x2_ne(v128_t a, v128_t b) {
1038   return wasm_i64x2_ne(a, b);
1039 }
1041 // CHECK-LABEL: @test_i64x2_lt(
1042 // CHECK-NEXT:  entry:
1043 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1044 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1045 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
1046 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1047 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1048 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1049 //
test_i64x2_lt(v128_t a,v128_t b)1050 v128_t test_i64x2_lt(v128_t a, v128_t b) {
1051   return wasm_i64x2_lt(a, b);
1052 }
1054 // CHECK-LABEL: @test_i64x2_gt(
1055 // CHECK-NEXT:  entry:
1056 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1057 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1058 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
1059 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1060 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1061 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1062 //
test_i64x2_gt(v128_t a,v128_t b)1063 v128_t test_i64x2_gt(v128_t a, v128_t b) {
1064   return wasm_i64x2_gt(a, b);
1065 }
1067 // CHECK-LABEL: @test_i64x2_le(
1068 // CHECK-NEXT:  entry:
1069 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1070 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1071 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
1072 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1073 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1074 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1075 //
test_i64x2_le(v128_t a,v128_t b)1076 v128_t test_i64x2_le(v128_t a, v128_t b) {
1077   return wasm_i64x2_le(a, b);
1078 }
1080 // CHECK-LABEL: @test_i64x2_ge(
1081 // CHECK-NEXT:  entry:
1082 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1083 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1084 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
1085 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1086 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1087 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1088 //
test_i64x2_ge(v128_t a,v128_t b)1089 v128_t test_i64x2_ge(v128_t a, v128_t b) {
1090   return wasm_i64x2_ge(a, b);
1091 }
1093 // CHECK-LABEL: @test_f32x4_eq(
1094 // CHECK-NEXT:  entry:
1095 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1096 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1097 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
1098 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1099 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1100 //
test_f32x4_eq(v128_t a,v128_t b)1101 v128_t test_f32x4_eq(v128_t a, v128_t b) {
1102   return wasm_f32x4_eq(a, b);
1103 }
1105 // CHECK-LABEL: @test_f32x4_ne(
1106 // CHECK-NEXT:  entry:
1107 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1108 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1109 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
1110 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1111 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1112 //
test_f32x4_ne(v128_t a,v128_t b)1113 v128_t test_f32x4_ne(v128_t a, v128_t b) {
1114   return wasm_f32x4_ne(a, b);
1115 }
1117 // CHECK-LABEL: @test_f32x4_lt(
1118 // CHECK-NEXT:  entry:
1119 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1120 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1121 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
1122 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1123 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1124 //
test_f32x4_lt(v128_t a,v128_t b)1125 v128_t test_f32x4_lt(v128_t a, v128_t b) {
1126   return wasm_f32x4_lt(a, b);
1127 }
1129 // CHECK-LABEL: @test_f32x4_gt(
1130 // CHECK-NEXT:  entry:
1131 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1132 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1133 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
1134 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1135 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1136 //
test_f32x4_gt(v128_t a,v128_t b)1137 v128_t test_f32x4_gt(v128_t a, v128_t b) {
1138   return wasm_f32x4_gt(a, b);
1139 }
1141 // CHECK-LABEL: @test_f32x4_le(
1142 // CHECK-NEXT:  entry:
1143 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1144 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1145 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
1146 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1147 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1148 //
test_f32x4_le(v128_t a,v128_t b)1149 v128_t test_f32x4_le(v128_t a, v128_t b) {
1150   return wasm_f32x4_le(a, b);
1151 }
1153 // CHECK-LABEL: @test_f32x4_ge(
1154 // CHECK-NEXT:  entry:
1155 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1156 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1157 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
1158 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1159 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1160 //
test_f32x4_ge(v128_t a,v128_t b)1161 v128_t test_f32x4_ge(v128_t a, v128_t b) {
1162   return wasm_f32x4_ge(a, b);
1163 }
1165 // CHECK-LABEL: @test_f64x2_eq(
1166 // CHECK-NEXT:  entry:
1167 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1168 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1169 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
1170 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1171 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1172 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1173 //
test_f64x2_eq(v128_t a,v128_t b)1174 v128_t test_f64x2_eq(v128_t a, v128_t b) {
1175   return wasm_f64x2_eq(a, b);
1176 }
1178 // CHECK-LABEL: @test_f64x2_ne(
1179 // CHECK-NEXT:  entry:
1180 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1181 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1182 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
1183 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1184 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1185 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1186 //
test_f64x2_ne(v128_t a,v128_t b)1187 v128_t test_f64x2_ne(v128_t a, v128_t b) {
1188   return wasm_f64x2_ne(a, b);
1189 }
1191 // CHECK-LABEL: @test_f64x2_lt(
1192 // CHECK-NEXT:  entry:
1193 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1194 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1195 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
1196 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1197 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1198 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1199 //
test_f64x2_lt(v128_t a,v128_t b)1200 v128_t test_f64x2_lt(v128_t a, v128_t b) {
1201   return wasm_f64x2_lt(a, b);
1202 }
1204 // CHECK-LABEL: @test_f64x2_gt(
1205 // CHECK-NEXT:  entry:
1206 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1207 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1208 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
1209 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1210 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1211 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1212 //
test_f64x2_gt(v128_t a,v128_t b)1213 v128_t test_f64x2_gt(v128_t a, v128_t b) {
1214   return wasm_f64x2_gt(a, b);
1215 }
1217 // CHECK-LABEL: @test_f64x2_le(
1218 // CHECK-NEXT:  entry:
1219 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1220 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1221 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
1222 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1223 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1224 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1225 //
test_f64x2_le(v128_t a,v128_t b)1226 v128_t test_f64x2_le(v128_t a, v128_t b) {
1227   return wasm_f64x2_le(a, b);
1228 }
1230 // CHECK-LABEL: @test_f64x2_ge(
1231 // CHECK-NEXT:  entry:
1232 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1233 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1234 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
1235 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1236 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1237 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1238 //
test_f64x2_ge(v128_t a,v128_t b)1239 v128_t test_f64x2_ge(v128_t a, v128_t b) {
1240   return wasm_f64x2_ge(a, b);
1241 }
1243 // CHECK-LABEL: @test_v128_not(
1244 // CHECK-NEXT:  entry:
1245 // CHECK-NEXT:    [[NEG_I:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
1246 // CHECK-NEXT:    ret <4 x i32> [[NEG_I]]
1247 //
test_v128_not(v128_t a)1248 v128_t test_v128_not(v128_t a) {
1249   return wasm_v128_not(a);
1250 }
1252 // CHECK-LABEL: @test_v128_and(
1253 // CHECK-NEXT:  entry:
1254 // CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
1255 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
1256 //
test_v128_and(v128_t a,v128_t b)1257 v128_t test_v128_and(v128_t a, v128_t b) {
1258   return wasm_v128_and(a, b);
1259 }
1261 // CHECK-LABEL: @test_v128_or(
1262 // CHECK-NEXT:  entry:
1263 // CHECK-NEXT:    [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
1264 // CHECK-NEXT:    ret <4 x i32> [[OR_I]]
1265 //
test_v128_or(v128_t a,v128_t b)1266 v128_t test_v128_or(v128_t a, v128_t b) {
1267   return wasm_v128_or(a, b);
1268 }
1270 // CHECK-LABEL: @test_v128_xor(
1271 // CHECK-NEXT:  entry:
1272 // CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
1273 // CHECK-NEXT:    ret <4 x i32> [[XOR_I]]
1274 //
test_v128_xor(v128_t a,v128_t b)1275 v128_t test_v128_xor(v128_t a, v128_t b) {
1276   return wasm_v128_xor(a, b);
1277 }
1279 // CHECK-LABEL: @test_v128_andnot(
1280 // CHECK-NEXT:  entry:
1281 // CHECK-NEXT:    [[NEG_I:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
1282 // CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[NEG_I]], [[A:%.*]]
1283 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
1284 //
test_v128_andnot(v128_t a,v128_t b)1285 v128_t test_v128_andnot(v128_t a, v128_t b) {
1286   return wasm_v128_andnot(a, b);
1287 }
1289 // CHECK-LABEL: @test_v128_any_true(
1290 // CHECK-NEXT:  entry:
1291 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1292 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6:[0-9]+]]
1293 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1294 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1295 //
test_v128_any_true(v128_t a)1296 bool test_v128_any_true(v128_t a) {
1297   return wasm_v128_any_true(a);
1298 }
1300 // CHECK-LABEL: @test_v128_bitselect(
1301 // CHECK-NEXT:  entry:
1302 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR6]]
1303 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
1304 //
test_v128_bitselect(v128_t a,v128_t b,v128_t mask)1305 v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
1306   return wasm_v128_bitselect(a, b, mask);
1307 }
1309 // CHECK-LABEL: @test_i8x16_abs(
1310 // CHECK-NEXT:  entry:
1311 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1312 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR6]]
1313 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1314 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1315 //
test_i8x16_abs(v128_t a)1316 v128_t test_i8x16_abs(v128_t a) {
1317   return wasm_i8x16_abs(a);
1318 }
1320 // CHECK-LABEL: @test_i8x16_neg(
1321 // CHECK-NEXT:  entry:
1322 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1323 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
1324 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1325 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1326 //
test_i8x16_neg(v128_t a)1327 v128_t test_i8x16_neg(v128_t a) {
1328   return wasm_i8x16_neg(a);
1329 }
1331 // CHECK-LABEL: @test_i8x16_all_true(
1332 // CHECK-NEXT:  entry:
1333 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1334 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1335 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1336 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1337 //
test_i8x16_all_true(v128_t a)1338 bool test_i8x16_all_true(v128_t a) {
1339   return wasm_i8x16_all_true(a);
1340 }
1342 // CHECK-LABEL: @test_i8x16_bitmask(
1343 // CHECK-NEXT:  entry:
1344 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1345 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1346 // CHECK-NEXT:    ret i32 [[TMP1]]
1347 //
test_i8x16_bitmask(v128_t a)1348 int32_t test_i8x16_bitmask(v128_t a) {
1349   return wasm_i8x16_bitmask(a);
1350 }
1352 // CHECK-LABEL: @test_i8x16_popcnt(
1353 // CHECK-NEXT:  entry:
1354 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1355 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]]
1356 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1357 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1358 //
test_i8x16_popcnt(v128_t a)1359 v128_t test_i8x16_popcnt(v128_t a) {
1360   return wasm_i8x16_popcnt(a);
1361 }
1363 // CHECK-LABEL: @test_i8x16_shl(
1364 // CHECK-NEXT:  entry:
1365 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1366 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1367 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1368 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1369 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
1370 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1371 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1372 //
test_i8x16_shl(v128_t a,int32_t b)1373 v128_t test_i8x16_shl(v128_t a, int32_t b) {
1374   return wasm_i8x16_shl(a, b);
1375 }
1377 // CHECK-LABEL: @test_i8x16_shr(
1378 // CHECK-NEXT:  entry:
1379 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1380 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1381 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1382 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1383 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1384 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1385 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1386 //
test_i8x16_shr(v128_t a,int32_t b)1387 v128_t test_i8x16_shr(v128_t a, int32_t b) {
1388   return wasm_i8x16_shr(a, b);
1389 }
1391 // CHECK-LABEL: @test_u8x16_shr(
1392 // CHECK-NEXT:  entry:
1393 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1394 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1395 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
1396 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
1397 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1398 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1399 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1400 //
test_u8x16_shr(v128_t a,int32_t b)1401 v128_t test_u8x16_shr(v128_t a, int32_t b) {
1402   return wasm_u8x16_shr(a, b);
1403 }
1405 // CHECK-LABEL: @test_i8x16_add(
1406 // CHECK-NEXT:  entry:
1407 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1408 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1409 // CHECK-NEXT:    [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
1410 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
1411 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1412 //
test_i8x16_add(v128_t a,v128_t b)1413 v128_t test_i8x16_add(v128_t a, v128_t b) {
1414   return wasm_i8x16_add(a, b);
1415 }
1417 // CHECK-LABEL: @test_i8x16_add_sat(
1418 // CHECK-NEXT:  entry:
1419 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1420 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1421 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1422 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1423 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1424 //
test_i8x16_add_sat(v128_t a,v128_t b)1425 v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
1426   return wasm_i8x16_add_sat(a, b);
1427 }
1429 // CHECK-LABEL: @test_u8x16_add_sat(
1430 // CHECK-NEXT:  entry:
1431 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1432 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1433 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1434 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1435 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1436 //
test_u8x16_add_sat(v128_t a,v128_t b)1437 v128_t test_u8x16_add_sat(v128_t a, v128_t b) {
1438   return wasm_u8x16_add_sat(a, b);
1439 }
1441 // CHECK-LABEL: @test_i8x16_sub(
1442 // CHECK-NEXT:  entry:
1443 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1444 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1445 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
1446 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1447 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1448 //
test_i8x16_sub(v128_t a,v128_t b)1449 v128_t test_i8x16_sub(v128_t a, v128_t b) {
1450   return wasm_i8x16_sub(a, b);
1451 }
1453 // CHECK-LABEL: @test_i8x16_sub_sat(
1454 // CHECK-NEXT:  entry:
1455 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1456 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1457 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1458 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1459 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1460 //
test_i8x16_sub_sat(v128_t a,v128_t b)1461 v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
1462   return wasm_i8x16_sub_sat(a, b);
1463 }
1465 // CHECK-LABEL: @test_u8x16_sub_sat(
1466 // CHECK-NEXT:  entry:
1467 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1468 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1469 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1470 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1471 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1472 //
test_u8x16_sub_sat(v128_t a,v128_t b)1473 v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
1474   return wasm_u8x16_sub_sat(a, b);
1475 }
1477 // CHECK-LABEL: @test_i8x16_min(
1478 // CHECK-NEXT:  entry:
1479 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1480 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1481 // CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
1482 // CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1483 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1484 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1485 //
test_i8x16_min(v128_t a,v128_t b)1486 v128_t test_i8x16_min(v128_t a, v128_t b) {
1487   return wasm_i8x16_min(a, b);
1488 }
1490 // CHECK-LABEL: @test_u8x16_min(
1491 // CHECK-NEXT:  entry:
1492 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1493 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1494 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
1495 // CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1496 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1497 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1498 //
test_u8x16_min(v128_t a,v128_t b)1499 v128_t test_u8x16_min(v128_t a, v128_t b) {
1500   return wasm_u8x16_min(a, b);
1501 }
1503 // CHECK-LABEL: @test_i8x16_max(
1504 // CHECK-NEXT:  entry:
1505 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1506 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1507 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
1508 // CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1509 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1510 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1511 //
test_i8x16_max(v128_t a,v128_t b)1512 v128_t test_i8x16_max(v128_t a, v128_t b) {
1513   return wasm_i8x16_max(a, b);
1514 }
1516 // CHECK-LABEL: @test_u8x16_max(
1517 // CHECK-NEXT:  entry:
1518 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1519 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1520 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
1521 // CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
1522 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
1523 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1524 //
test_u8x16_max(v128_t a,v128_t b)1525 v128_t test_u8x16_max(v128_t a, v128_t b) {
1526   return wasm_u8x16_max(a, b);
1527 }
1529 // CHECK-LABEL: @test_u8x16_avgr(
1530 // CHECK-NEXT:  entry:
1531 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1532 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1533 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
1534 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1535 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1536 //
test_u8x16_avgr(v128_t a,v128_t b)1537 v128_t test_u8x16_avgr(v128_t a, v128_t b) {
1538   return wasm_u8x16_avgr(a, b);
1539 }
1541 // CHECK-LABEL: @test_i16x8_abs(
1542 // CHECK-NEXT:  entry:
1543 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1544 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR6]]
1545 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
1546 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1547 //
test_i16x8_abs(v128_t a)1548 v128_t test_i16x8_abs(v128_t a) {
1549   return wasm_i16x8_abs(a);
1550 }
1552 // CHECK-LABEL: @test_i16x8_neg(
1553 // CHECK-NEXT:  entry:
1554 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1555 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
1556 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1557 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1558 //
test_i16x8_neg(v128_t a)1559 v128_t test_i16x8_neg(v128_t a) {
1560   return wasm_i16x8_neg(a);
1561 }
1563 // CHECK-LABEL: @test_i16x8_all_true(
1564 // CHECK-NEXT:  entry:
1565 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1566 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR6]]
1567 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1568 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1569 //
test_i16x8_all_true(v128_t a)1570 bool test_i16x8_all_true(v128_t a) {
1571   return wasm_i16x8_all_true(a);
1572 }
1574 // CHECK-LABEL: @test_i16x8_bitmask(
1575 // CHECK-NEXT:  entry:
1576 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1577 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR6]]
1578 // CHECK-NEXT:    ret i32 [[TMP1]]
1579 //
test_i16x8_bitmask(v128_t a)1580 int32_t test_i16x8_bitmask(v128_t a) {
1581   return wasm_i16x8_bitmask(a);
1582 }
1584 // CHECK-LABEL: @test_i16x8_shl(
1585 // CHECK-NEXT:  entry:
1586 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1587 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1588 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1589 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1590 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
1591 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1592 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1593 //
test_i16x8_shl(v128_t a,int32_t b)1594 v128_t test_i16x8_shl(v128_t a, int32_t b) {
1595   return wasm_i16x8_shl(a, b);
1596 }
1598 // CHECK-LABEL: @test_i16x8_shr(
1599 // CHECK-NEXT:  entry:
1600 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1601 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1602 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1603 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1604 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1605 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1606 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1607 //
test_i16x8_shr(v128_t a,int32_t b)1608 v128_t test_i16x8_shr(v128_t a, int32_t b) {
1609   return wasm_i16x8_shr(a, b);
1610 }
1612 // CHECK-LABEL: @test_u16x8_shr(
1613 // CHECK-NEXT:  entry:
1614 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1615 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1616 // CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
1617 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
1618 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1619 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1620 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1621 //
test_u16x8_shr(v128_t a,int32_t b)1622 v128_t test_u16x8_shr(v128_t a, int32_t b) {
1623   return wasm_u16x8_shr(a, b);
1624 }
1626 // CHECK-LABEL: @test_i16x8_add(
1627 // CHECK-NEXT:  entry:
1628 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1629 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1630 // CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
1631 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
1632 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1633 //
test_i16x8_add(v128_t a,v128_t b)1634 v128_t test_i16x8_add(v128_t a, v128_t b) {
1635   return wasm_i16x8_add(a, b);
1636 }
1638 // CHECK-LABEL: @test_i16x8_add_sat(
1639 // CHECK-NEXT:  entry:
1640 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1641 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1642 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1643 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1644 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1645 //
test_i16x8_add_sat(v128_t a,v128_t b)1646 v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
1647   return wasm_i16x8_add_sat(a, b);
1648 }
1650 // CHECK-LABEL: @test_u16x8_add_sat(
1651 // CHECK-NEXT:  entry:
1652 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1653 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1654 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1655 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1656 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1657 //
test_u16x8_add_sat(v128_t a,v128_t b)1658 v128_t test_u16x8_add_sat(v128_t a, v128_t b) {
1659   return wasm_u16x8_add_sat(a, b);
1660 }
1662 // CHECK-LABEL: @test_i16x8_sub(
1663 // CHECK-NEXT:  entry:
1664 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1665 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1666 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
1667 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1668 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1669 //
test_i16x8_sub(v128_t a,v128_t b)1670 v128_t test_i16x8_sub(v128_t a, v128_t b) {
1671   return wasm_i16x8_sub(a, b);
1672 }
1674 // CHECK-LABEL: @test_i16x8_sub_sat(
1675 // CHECK-NEXT:  entry:
1676 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1677 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1678 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1679 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1680 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1681 //
test_i16x8_sub_sat(v128_t a,v128_t b)1682 v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
1683   return wasm_i16x8_sub_sat(a, b);
1684 }
1686 // CHECK-LABEL: @test_u16x8_sub_sat(
1687 // CHECK-NEXT:  entry:
1688 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1689 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1690 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1691 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1692 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1693 //
test_u16x8_sub_sat(v128_t a,v128_t b)1694 v128_t test_u16x8_sub_sat(v128_t a, v128_t b) {
1695   return wasm_u16x8_sub_sat(a, b);
1696 }
1698 // CHECK-LABEL: @test_i16x8_mul(
1699 // CHECK-NEXT:  entry:
1700 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1701 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1702 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
1703 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
1704 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1705 //
test_i16x8_mul(v128_t a,v128_t b)1706 v128_t test_i16x8_mul(v128_t a, v128_t b) {
1707   return wasm_i16x8_mul(a, b);
1708 }
1710 // CHECK-LABEL: @test_i16x8_min(
1711 // CHECK-NEXT:  entry:
1712 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1713 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1714 // CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
1715 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1716 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1717 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1718 //
test_i16x8_min(v128_t a,v128_t b)1719 v128_t test_i16x8_min(v128_t a, v128_t b) {
1720   return wasm_i16x8_min(a, b);
1721 }
1723 // CHECK-LABEL: @test_u16x8_min(
1724 // CHECK-NEXT:  entry:
1725 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1726 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1727 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
1728 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1729 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1730 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1731 //
test_u16x8_min(v128_t a,v128_t b)1732 v128_t test_u16x8_min(v128_t a, v128_t b) {
1733   return wasm_u16x8_min(a, b);
1734 }
1736 // CHECK-LABEL: @test_i16x8_max(
1737 // CHECK-NEXT:  entry:
1738 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1739 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1740 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
1741 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1742 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1743 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1744 //
test_i16x8_max(v128_t a,v128_t b)1745 v128_t test_i16x8_max(v128_t a, v128_t b) {
1746   return wasm_i16x8_max(a, b);
1747 }
1749 // CHECK-LABEL: @test_u16x8_max(
1750 // CHECK-NEXT:  entry:
1751 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1752 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1753 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
1754 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
1755 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
1756 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1757 //
test_u16x8_max(v128_t a,v128_t b)1758 v128_t test_u16x8_max(v128_t a, v128_t b) {
1759   return wasm_u16x8_max(a, b);
1760 }
1762 // CHECK-LABEL: @test_u16x8_avgr(
1763 // CHECK-NEXT:  entry:
1764 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1765 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1766 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1767 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1768 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1769 //
test_u16x8_avgr(v128_t a,v128_t b)1770 v128_t test_u16x8_avgr(v128_t a, v128_t b) {
1771   return wasm_u16x8_avgr(a, b);
1772 }
1774 // CHECK-LABEL: @test_i32x4_abs(
1775 // CHECK-NEXT:  entry:
1776 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR6]]
1777 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
1778 //
test_i32x4_abs(v128_t a)1779 v128_t test_i32x4_abs(v128_t a) {
1780   return wasm_i32x4_abs(a);
1781 }
1783 // CHECK-LABEL: @test_i32x4_neg(
1784 // CHECK-NEXT:  entry:
1785 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
1786 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
1787 //
test_i32x4_neg(v128_t a)1788 v128_t test_i32x4_neg(v128_t a) {
1789   return wasm_i32x4_neg(a);
1790 }
1792 // CHECK-LABEL: @test_i32x4_all_true(
1793 // CHECK-NEXT:  entry:
1794 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR6]]
1795 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
1796 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1797 //
test_i32x4_all_true(v128_t a)1798 bool test_i32x4_all_true(v128_t a) {
1799   return wasm_i32x4_all_true(a);
1800 }
1802 // CHECK-LABEL: @test_i32x4_bitmask(
1803 // CHECK-NEXT:  entry:
1804 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR6]]
1805 // CHECK-NEXT:    ret i32 [[TMP0]]
1806 //
test_i32x4_bitmask(v128_t a)1807 int32_t test_i32x4_bitmask(v128_t a) {
1808   return wasm_i32x4_bitmask(a);
1809 }
1811 // CHECK-LABEL: @test_i32x4_shl(
1812 // CHECK-NEXT:  entry:
1813 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1814 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1815 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1816 // CHECK-NEXT:    ret <4 x i32> [[SHL_I]]
1817 //
test_i32x4_shl(v128_t a,int32_t b)1818 v128_t test_i32x4_shl(v128_t a, int32_t b) {
1819   return wasm_i32x4_shl(a, b);
1820 }
1822 // CHECK-LABEL: @test_i32x4_shr(
1823 // CHECK-NEXT:  entry:
1824 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1825 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1826 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1827 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
1828 //
test_i32x4_shr(v128_t a,int32_t b)1829 v128_t test_i32x4_shr(v128_t a, int32_t b) {
1830   return wasm_i32x4_shr(a, b);
1831 }
1833 // CHECK-LABEL: @test_u32x4_shr(
1834 // CHECK-NEXT:  entry:
1835 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
1836 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
1837 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
1838 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
1839 //
test_u32x4_shr(v128_t a,int32_t b)1840 v128_t test_u32x4_shr(v128_t a, int32_t b) {
1841   return wasm_u32x4_shr(a, b);
1842 }
1844 // CHECK-LABEL: @test_i32x4_add(
1845 // CHECK-NEXT:  entry:
1846 // CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
1847 // CHECK-NEXT:    ret <4 x i32> [[ADD_I]]
1848 //
test_i32x4_add(v128_t a,v128_t b)1849 v128_t test_i32x4_add(v128_t a, v128_t b) {
1850   return wasm_i32x4_add(a, b);
1851 }
1853 // CHECK-LABEL: @test_i32x4_sub(
1854 // CHECK-NEXT:  entry:
1855 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
1856 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
1857 //
test_i32x4_sub(v128_t a,v128_t b)1858 v128_t test_i32x4_sub(v128_t a, v128_t b) {
1859   return wasm_i32x4_sub(a, b);
1860 }
1862 // CHECK-LABEL: @test_i32x4_mul(
1863 // CHECK-NEXT:  entry:
1864 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
1865 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
1866 //
test_i32x4_mul(v128_t a,v128_t b)1867 v128_t test_i32x4_mul(v128_t a, v128_t b) {
1868   return wasm_i32x4_mul(a, b);
1869 }
1871 // CHECK-LABEL: @test_i32x4_min(
1872 // CHECK-NEXT:  entry:
1873 // CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
1874 // CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1875 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1876 //
test_i32x4_min(v128_t a,v128_t b)1877 v128_t test_i32x4_min(v128_t a, v128_t b) {
1878   return wasm_i32x4_min(a, b);
1879 }
1881 // CHECK-LABEL: @test_u32x4_min(
1882 // CHECK-NEXT:  entry:
1883 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
1884 // CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1885 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1886 //
test_u32x4_min(v128_t a,v128_t b)1887 v128_t test_u32x4_min(v128_t a, v128_t b) {
1888   return wasm_u32x4_min(a, b);
1889 }
1891 // CHECK-LABEL: @test_i32x4_max(
1892 // CHECK-NEXT:  entry:
1893 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
1894 // CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1895 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1896 //
test_i32x4_max(v128_t a,v128_t b)1897 v128_t test_i32x4_max(v128_t a, v128_t b) {
1898   return wasm_i32x4_max(a, b);
1899 }
1901 // CHECK-LABEL: @test_u32x4_max(
1902 // CHECK-NEXT:  entry:
1903 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
1904 // CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
1905 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1906 //
test_u32x4_max(v128_t a,v128_t b)1907 v128_t test_u32x4_max(v128_t a, v128_t b) {
1908   return wasm_u32x4_max(a, b);
1909 }
1911 // CHECK-LABEL: @test_i32x4_dot_i16x8(
1912 // CHECK-NEXT:  entry:
1913 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1914 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1915 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
1916 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1917 //
test_i32x4_dot_i16x8(v128_t a,v128_t b)1918 v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
1919   return wasm_i32x4_dot_i16x8(a, b);
1920 }
1922 // CHECK-LABEL: @test_i64x2_abs(
1923 // CHECK-NEXT:  entry:
1924 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1925 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR6]]
1926 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
1927 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1928 //
test_i64x2_abs(v128_t a)1929 v128_t test_i64x2_abs(v128_t a) {
1930   return wasm_i64x2_abs(a);
1931 }
1933 // CHECK-LABEL: @test_i64x2_neg(
1934 // CHECK-NEXT:  entry:
1935 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1936 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
1937 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
1938 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1939 //
test_i64x2_neg(v128_t a)1940 v128_t test_i64x2_neg(v128_t a) {
1941   return wasm_i64x2_neg(a);
1942 }
1944 // CHECK-LABEL: @test_i64x2_all_true(
1945 // CHECK-NEXT:  entry:
1946 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1947 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR6]]
1948 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1949 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1950 //
test_i64x2_all_true(v128_t a)1951 bool test_i64x2_all_true(v128_t a) {
1952   return wasm_i64x2_all_true(a);
1953 }
1955 // CHECK-LABEL: @test_i64x2_bitmask(
1956 // CHECK-NEXT:  entry:
1957 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1958 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR6]]
1959 // CHECK-NEXT:    ret i32 [[TMP1]]
1960 //
test_i64x2_bitmask(v128_t a)1961 int32_t test_i64x2_bitmask(v128_t a) {
1962   return wasm_i64x2_bitmask(a);
1963 }
1965 // CHECK-LABEL: @test_i64x2_shl(
1966 // CHECK-NEXT:  entry:
1967 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1968 // CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1969 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1970 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1971 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
1972 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
1973 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1974 //
test_i64x2_shl(v128_t a,int32_t b)1975 v128_t test_i64x2_shl(v128_t a, int32_t b) {
1976   return wasm_i64x2_shl(a, b);
1977 }
1979 // CHECK-LABEL: @test_i64x2_shr(
1980 // CHECK-NEXT:  entry:
1981 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1982 // CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1983 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1984 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1985 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
1986 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
1987 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1988 //
test_i64x2_shr(v128_t a,int32_t b)1989 v128_t test_i64x2_shr(v128_t a, int32_t b) {
1990   return wasm_i64x2_shr(a, b);
1991 }
1993 // CHECK-LABEL: @test_u64x2_shr(
1994 // CHECK-NEXT:  entry:
1995 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1996 // CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
1997 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
1998 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
1999 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2000 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2001 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2002 //
test_u64x2_shr(v128_t a,int32_t b)2003 v128_t test_u64x2_shr(v128_t a, int32_t b) {
2004   return wasm_u64x2_shr(a, b);
2005 }
2007 // CHECK-LABEL: @test_i64x2_add(
2008 // CHECK-NEXT:  entry:
2009 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2010 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2011 // CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
2012 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
2013 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2014 //
test_i64x2_add(v128_t a,v128_t b)2015 v128_t test_i64x2_add(v128_t a, v128_t b) {
2016   return wasm_i64x2_add(a, b);
2017 }
2019 // CHECK-LABEL: @test_i64x2_sub(
2020 // CHECK-NEXT:  entry:
2021 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2022 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2023 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
2024 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
2025 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2026 //
test_i64x2_sub(v128_t a,v128_t b)2027 v128_t test_i64x2_sub(v128_t a, v128_t b) {
2028   return wasm_i64x2_sub(a, b);
2029 }
2031 // CHECK-LABEL: @test_i64x2_mul(
2032 // CHECK-NEXT:  entry:
2033 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2034 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2035 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
2036 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2037 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2038 //
test_i64x2_mul(v128_t a,v128_t b)2039 v128_t test_i64x2_mul(v128_t a, v128_t b) {
2040   return wasm_i64x2_mul(a, b);
2041 }
2043 // CHECK-LABEL: @test_f32x4_abs(
2044 // CHECK-NEXT:  entry:
2045 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2046 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2047 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2048 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2049 //
test_f32x4_abs(v128_t a)2050 v128_t test_f32x4_abs(v128_t a) {
2051   return wasm_f32x4_abs(a);
2052 }
2054 // CHECK-LABEL: @test_f32x4_neg(
2055 // CHECK-NEXT:  entry:
2056 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2057 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
2058 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
2059 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2060 //
test_f32x4_neg(v128_t a)2061 v128_t test_f32x4_neg(v128_t a) {
2062   return wasm_f32x4_neg(a);
2063 }
2065 // CHECK-LABEL: @test_f32x4_sqrt(
2066 // CHECK-NEXT:  entry:
2067 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2068 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2069 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2070 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2071 //
test_f32x4_sqrt(v128_t a)2072 v128_t test_f32x4_sqrt(v128_t a) {
2073   return wasm_f32x4_sqrt(a);
2074 }
2076 // CHECK-LABEL: @test_f32x4_ceil(
2077 // CHECK-NEXT:  entry:
2078 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2079 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2080 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2081 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2082 //
test_f32x4_ceil(v128_t a)2083 v128_t test_f32x4_ceil(v128_t a) {
2084   return wasm_f32x4_ceil(a);
2085 }
2087 // CHECK-LABEL: @test_f32x4_floor(
2088 // CHECK-NEXT:  entry:
2089 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2090 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2091 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2092 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2093 //
test_f32x4_floor(v128_t a)2094 v128_t test_f32x4_floor(v128_t a) {
2095   return wasm_f32x4_floor(a);
2096 }
2098 // CHECK-LABEL: @test_f32x4_trunc(
2099 // CHECK-NEXT:  entry:
2100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2101 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2102 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2103 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2104 //
test_f32x4_trunc(v128_t a)2105 v128_t test_f32x4_trunc(v128_t a) {
2106   return wasm_f32x4_trunc(a);
2107 }
2109 // CHECK-LABEL: @test_f32x4_nearest(
2110 // CHECK-NEXT:  entry:
2111 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2112 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2113 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2114 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2115 //
test_f32x4_nearest(v128_t a)2116 v128_t test_f32x4_nearest(v128_t a) {
2117   return wasm_f32x4_nearest(a);
2118 }
2120 // CHECK-LABEL: @test_f32x4_add(
2121 // CHECK-NEXT:  entry:
2122 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2123 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2124 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
2125 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
2126 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2127 //
test_f32x4_add(v128_t a,v128_t b)2128 v128_t test_f32x4_add(v128_t a, v128_t b) {
2129   return wasm_f32x4_add(a, b);
2130 }
2132 // CHECK-LABEL: @test_f32x4_sub(
2133 // CHECK-NEXT:  entry:
2134 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2135 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2136 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
2137 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
2138 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2139 //
test_f32x4_sub(v128_t a,v128_t b)2140 v128_t test_f32x4_sub(v128_t a, v128_t b) {
2141   return wasm_f32x4_sub(a, b);
2142 }
2144 // CHECK-LABEL: @test_f32x4_mul(
2145 // CHECK-NEXT:  entry:
2146 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2147 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2148 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
2149 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
2150 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2151 //
test_f32x4_mul(v128_t a,v128_t b)2152 v128_t test_f32x4_mul(v128_t a, v128_t b) {
2153   return wasm_f32x4_mul(a, b);
2154 }
2156 // CHECK-LABEL: @test_f32x4_div(
2157 // CHECK-NEXT:  entry:
2158 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2159 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2160 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
2161 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
2162 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2163 //
test_f32x4_div(v128_t a,v128_t b)2164 v128_t test_f32x4_div(v128_t a, v128_t b) {
2165   return wasm_f32x4_div(a, b);
2166 }
2168 // CHECK-LABEL: @test_f32x4_min(
2169 // CHECK-NEXT:  entry:
2170 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2171 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2172 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
2173 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2174 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2175 //
test_f32x4_min(v128_t a,v128_t b)2176 v128_t test_f32x4_min(v128_t a, v128_t b) {
2177   return wasm_f32x4_min(a, b);
2178 }
2180 // CHECK-LABEL: @test_f32x4_max(
2181 // CHECK-NEXT:  entry:
2182 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2183 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2184 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
2185 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2186 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2187 //
test_f32x4_max(v128_t a,v128_t b)2188 v128_t test_f32x4_max(v128_t a, v128_t b) {
2189   return wasm_f32x4_max(a, b);
2190 }
2192 // CHECK-LABEL: @test_f32x4_pmin(
2193 // CHECK-NEXT:  entry:
2194 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2195 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2196 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
2197 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
2198 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2199 //
test_f32x4_pmin(v128_t a,v128_t b)2200 v128_t test_f32x4_pmin(v128_t a, v128_t b) {
2201   return wasm_f32x4_pmin(a, b);
2202 }
2204 // CHECK-LABEL: @test_f32x4_pmax(
2205 // CHECK-NEXT:  entry:
2206 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2207 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2208 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
2209 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
2210 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2211 //
test_f32x4_pmax(v128_t a,v128_t b)2212 v128_t test_f32x4_pmax(v128_t a, v128_t b) {
2213   return wasm_f32x4_pmax(a, b);
2214 }
2216 // CHECK-LABEL: @test_f64x2_abs(
2217 // CHECK-NEXT:  entry:
2218 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2219 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2220 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2221 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2222 //
test_f64x2_abs(v128_t a)2223 v128_t test_f64x2_abs(v128_t a) {
2224   return wasm_f64x2_abs(a);
2225 }
2227 // CHECK-LABEL: @test_f64x2_neg(
2228 // CHECK-NEXT:  entry:
2229 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2230 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
2231 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
2232 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2233 //
test_f64x2_neg(v128_t a)2234 v128_t test_f64x2_neg(v128_t a) {
2235   return wasm_f64x2_neg(a);
2236 }
2238 // CHECK-LABEL: @test_f64x2_sqrt(
2239 // CHECK-NEXT:  entry:
2240 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2241 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2242 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2243 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2244 //
test_f64x2_sqrt(v128_t a)2245 v128_t test_f64x2_sqrt(v128_t a) {
2246   return wasm_f64x2_sqrt(a);
2247 }
2249 // CHECK-LABEL: @test_f64x2_ceil(
2250 // CHECK-NEXT:  entry:
2251 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2252 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2253 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2254 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2255 //
test_f64x2_ceil(v128_t a)2256 v128_t test_f64x2_ceil(v128_t a) {
2257   return wasm_f64x2_ceil(a);
2258 }
2260 // CHECK-LABEL: @test_f64x2_floor(
2261 // CHECK-NEXT:  entry:
2262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2263 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2264 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2265 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2266 //
test_f64x2_floor(v128_t a)2267 v128_t test_f64x2_floor(v128_t a) {
2268   return wasm_f64x2_floor(a);
2269 }
2271 // CHECK-LABEL: @test_f64x2_trunc(
2272 // CHECK-NEXT:  entry:
2273 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2274 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2275 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2276 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2277 //
test_f64x2_trunc(v128_t a)2278 v128_t test_f64x2_trunc(v128_t a) {
2279   return wasm_f64x2_trunc(a);
2280 }
2282 // CHECK-LABEL: @test_f64x2_nearest(
2283 // CHECK-NEXT:  entry:
2284 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2285 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2286 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2287 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2288 //
test_f64x2_nearest(v128_t a)2289 v128_t test_f64x2_nearest(v128_t a) {
2290   return wasm_f64x2_nearest(a);
2291 }
2293 // CHECK-LABEL: @test_f64x2_add(
2294 // CHECK-NEXT:  entry:
2295 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2296 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2297 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
2298 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
2299 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2300 //
test_f64x2_add(v128_t a,v128_t b)2301 v128_t test_f64x2_add(v128_t a, v128_t b) {
2302   return wasm_f64x2_add(a, b);
2303 }
2305 // CHECK-LABEL: @test_f64x2_sub(
2306 // CHECK-NEXT:  entry:
2307 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2309 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
2310 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
2311 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2312 //
test_f64x2_sub(v128_t a,v128_t b)2313 v128_t test_f64x2_sub(v128_t a, v128_t b) {
2314   return wasm_f64x2_sub(a, b);
2315 }
2317 // CHECK-LABEL: @test_f64x2_mul(
2318 // CHECK-NEXT:  entry:
2319 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2320 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2321 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
2322 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
2323 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2324 //
test_f64x2_mul(v128_t a,v128_t b)2325 v128_t test_f64x2_mul(v128_t a, v128_t b) {
2326   return wasm_f64x2_mul(a, b);
2327 }
2329 // CHECK-LABEL: @test_f64x2_div(
2330 // CHECK-NEXT:  entry:
2331 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2332 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2333 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
2334 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
2335 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2336 //
test_f64x2_div(v128_t a,v128_t b)2337 v128_t test_f64x2_div(v128_t a, v128_t b) {
2338   return wasm_f64x2_div(a, b);
2339 }
2341 // CHECK-LABEL: @test_f64x2_min(
2342 // CHECK-NEXT:  entry:
2343 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2344 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2345 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
2346 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2347 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2348 //
test_f64x2_min(v128_t a,v128_t b)2349 v128_t test_f64x2_min(v128_t a, v128_t b) {
2350   return wasm_f64x2_min(a, b);
2351 }
2353 // CHECK-LABEL: @test_f64x2_max(
2354 // CHECK-NEXT:  entry:
2355 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2356 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2357 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
2358 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2359 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2360 //
test_f64x2_max(v128_t a,v128_t b)2361 v128_t test_f64x2_max(v128_t a, v128_t b) {
2362   return wasm_f64x2_max(a, b);
2363 }
2365 // CHECK-LABEL: @test_f64x2_pmin(
2366 // CHECK-NEXT:  entry:
2367 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2368 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2369 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
2370 // CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP0]], <2 x double> [[TMP1]]
2371 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2372 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2373 //
test_f64x2_pmin(v128_t a,v128_t b)2374 v128_t test_f64x2_pmin(v128_t a, v128_t b) {
2375   return wasm_f64x2_pmin(a, b);
2376 }
2378 // CHECK-LABEL: @test_f64x2_pmax(
2379 // CHECK-NEXT:  entry:
2380 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2381 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2382 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
2383 // CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP1]], <2 x double> [[TMP0]]
2384 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2385 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2386 //
test_f64x2_pmax(v128_t a,v128_t b)2387 v128_t test_f64x2_pmax(v128_t a, v128_t b) {
2388   return wasm_f64x2_pmax(a, b);
2389 }
2391 // CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
2392 // CHECK-NEXT:  entry:
2393 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2394 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2395 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2396 //
test_i32x4_trunc_sat_f32x4(v128_t a)2397 v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
2398   return wasm_i32x4_trunc_sat_f32x4(a);
2399 }
2401 // CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
2402 // CHECK-NEXT:  entry:
2403 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2404 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR6]]
2405 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2406 //
test_u32x4_trunc_sat_f32x4(v128_t a)2407 v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
2408   return wasm_u32x4_trunc_sat_f32x4(a);
2409 }
2411 // CHECK-LABEL: @test_f32x4_convert_i32x4(
2412 // CHECK-NEXT:  entry:
2413 // CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
2414 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2415 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2416 //
test_f32x4_convert_i32x4(v128_t a)2417 v128_t test_f32x4_convert_i32x4(v128_t a) {
2418   return wasm_f32x4_convert_i32x4(a);
2419 }
2421 // CHECK-LABEL: @test_f32x4_convert_u32x4(
2422 // CHECK-NEXT:  entry:
2423 // CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
2424 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2425 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2426 //
test_f32x4_convert_u32x4(v128_t a)2427 v128_t test_f32x4_convert_u32x4(v128_t a) {
2428   return wasm_f32x4_convert_u32x4(a);
2429 }
2431 // CHECK-LABEL: @test_f64x2_convert_low_i32x4(
2432 // CHECK-NEXT:  entry:
2433 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2434 // CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2435 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2436 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2437 //
test_f64x2_convert_low_i32x4(v128_t a)2438 v128_t test_f64x2_convert_low_i32x4(v128_t a) {
2439   return wasm_f64x2_convert_low_i32x4(a);
2440 }
2442 // CHECK-LABEL: @test_f64x2_convert_low_u32x4(
2443 // CHECK-NEXT:  entry:
2444 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2445 // CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2446 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2447 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2448 //
test_f64x2_convert_low_u32x4(v128_t a)2449 v128_t test_f64x2_convert_low_u32x4(v128_t a) {
2450   return wasm_f64x2_convert_low_u32x4(a);
2451 }
2453 // CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
2454 // CHECK-NEXT:  entry:
2455 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2456 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2457 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2458 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2459 //
test_i32x4_trunc_sat_f64x2_zero(v128_t a)2460 v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
2461   return wasm_i32x4_trunc_sat_f64x2_zero(a);
2462 }
2464 // CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
2465 // CHECK-NEXT:  entry:
2466 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2467 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR6]]
2468 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2469 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2470 //
test_u32x4_trunc_sat_f64x2_zero(v128_t a)2471 v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
2472   return wasm_u32x4_trunc_sat_f64x2_zero(a);
2473 }
2475 // CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
2476 // CHECK-NEXT:  entry:
2477 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2478 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2479 // CHECK-NEXT:    [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
2480 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2481 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2482 //
test_f32x4_demote_f64x2_zero(v128_t a)2483 v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
2484   return wasm_f32x4_demote_f64x2_zero(a);
2485 }
2487 // CHECK-LABEL: @test_f64x2_promote_low_f32x4(
2488 // CHECK-NEXT:  entry:
2489 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2490 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
2491 // CHECK-NEXT:    [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
2492 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2493 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2494 //
test_f64x2_promote_low_f32x4(v128_t a)2495 v128_t test_f64x2_promote_low_f32x4(v128_t a) {
2496   return wasm_f64x2_promote_low_f32x4(a);
2497 }
2499 // CHECK-LABEL: @test_i8x16_shuffle(
2500 // CHECK-NEXT:  entry:
2501 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2502 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2503 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
2504 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2505 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2506 //
test_i8x16_shuffle(v128_t a,v128_t b)2507 v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
2508   return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
2509 }
2511 // CHECK-LABEL: @test_i16x8_shuffle(
2512 // CHECK-NEXT:  entry:
2513 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2514 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2515 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
2516 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2517 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2518 //
test_i16x8_shuffle(v128_t a,v128_t b)2519 v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
2520   return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
2521 }
2523 // CHECK-LABEL: @test_i32x4_shuffle(
2524 // CHECK-NEXT:  entry:
2525 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2526 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2527 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
2528 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2529 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2530 //
test_i32x4_shuffle(v128_t a,v128_t b)2531 v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
2532   return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
2533 }
2535 // CHECK-LABEL: @test_i64x2_shuffle(
2536 // CHECK-NEXT:  entry:
2537 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2538 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2539 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
2540 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2541 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2542 //
test_i64x2_shuffle(v128_t a,v128_t b)2543 v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
2544   return wasm_i64x2_shuffle(a, b, 1, 0);
2545 }
2547 // CHECK-LABEL: @test_i8x16_swizzle(
2548 // CHECK-NEXT:  entry:
2549 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2550 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2551 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
2552 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2553 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2554 //
test_i8x16_swizzle(v128_t a,v128_t b)2555 v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
2556   return wasm_i8x16_swizzle(a, b);
2557 }
2559 // CHECK-LABEL: @test_i8x16_narrow_i16x8(
2560 // CHECK-NEXT:  entry:
2561 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2562 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2563 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2564 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2565 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2566 //
test_i8x16_narrow_i16x8(v128_t a,v128_t b)2567 v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
2568   return wasm_i8x16_narrow_i16x8(a, b);
2569 }
2571 // CHECK-LABEL: @test_u8x16_narrow_i16x8(
2572 // CHECK-NEXT:  entry:
2573 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2574 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2575 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2576 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2577 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2578 //
test_u8x16_narrow_i16x8(v128_t a,v128_t b)2579 v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
2580   return wasm_u8x16_narrow_i16x8(a, b);
2581 }
2583 // CHECK-LABEL: @test_i16x8_narrow_i32x4(
2584 // CHECK-NEXT:  entry:
2585 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
2586 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2587 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2588 //
test_i16x8_narrow_i32x4(v128_t a,v128_t b)2589 v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
2590   return wasm_i16x8_narrow_i32x4(a, b);
2591 }
2593 // CHECK-LABEL: @test_u16x8_narrow_i32x4(
2594 // CHECK-NEXT:  entry:
2595 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
2596 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2597 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2598 //
test_u16x8_narrow_i32x4(v128_t a,v128_t b)2599 v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) {
2600   return wasm_u16x8_narrow_i32x4(a, b);
2601 }
2603 // CHECK-LABEL: @test_i16x8_extend_low_i8x16(
2604 // CHECK-NEXT:  entry:
2605 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2606 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2607 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2609 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2610 //
test_i16x8_extend_low_i8x16(v128_t a)2611 v128_t test_i16x8_extend_low_i8x16(v128_t a) {
2612   return wasm_i16x8_extend_low_i8x16(a);
2613 }
2615 // CHECK-LABEL: @test_i16x8_extend_high_i8x16(
2616 // CHECK-NEXT:  entry:
2617 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2618 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2619 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2620 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2621 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2622 //
test_i16x8_extend_high_i8x16(v128_t a)2623 v128_t test_i16x8_extend_high_i8x16(v128_t a) {
2624   return wasm_i16x8_extend_high_i8x16(a);
2625 }
2627 // CHECK-LABEL: @test_u16x8_extend_low_u8x16(
2628 // CHECK-NEXT:  entry:
2629 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2630 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2631 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2632 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2633 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2634 //
test_u16x8_extend_low_u8x16(v128_t a)2635 v128_t test_u16x8_extend_low_u8x16(v128_t a) {
2636   return wasm_u16x8_extend_low_u8x16(a);
2637 }
2639 // CHECK-LABEL: @test_u16x8_extend_high_u8x16(
2640 // CHECK-NEXT:  entry:
2641 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2642 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2643 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2645 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2646 //
test_u16x8_extend_high_u8x16(v128_t a)2647 v128_t test_u16x8_extend_high_u8x16(v128_t a) {
2648   return wasm_u16x8_extend_high_u8x16(a);
2649 }
2651 // CHECK-LABEL: @test_i32x4_extend_low_i16x8(
2652 // CHECK-NEXT:  entry:
2653 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2654 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2655 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2656 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2657 //
test_i32x4_extend_low_i16x8(v128_t a)2658 v128_t test_i32x4_extend_low_i16x8(v128_t a) {
2659   return wasm_i32x4_extend_low_i16x8(a);
2660 }
2662 // CHECK-LABEL: @test_i32x4_extend_high_i16x8(
2663 // CHECK-NEXT:  entry:
2664 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2665 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2666 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2667 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2668 //
test_i32x4_extend_high_i16x8(v128_t a)2669 v128_t test_i32x4_extend_high_i16x8(v128_t a) {
2670   return wasm_i32x4_extend_high_i16x8(a);
2671 }
2673 // CHECK-LABEL: @test_u32x4_extend_low_u16x8(
2674 // CHECK-NEXT:  entry:
2675 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2676 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2677 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2678 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2679 //
test_u32x4_extend_low_u16x8(v128_t a)2680 v128_t test_u32x4_extend_low_u16x8(v128_t a) {
2681   return wasm_u32x4_extend_low_u16x8(a);
2682 }
2684 // CHECK-LABEL: @test_u32x4_extend_high_u16x8(
2685 // CHECK-NEXT:  entry:
2686 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2687 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2688 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2689 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2690 //
test_u32x4_extend_high_u16x8(v128_t a)2691 v128_t test_u32x4_extend_high_u16x8(v128_t a) {
2692   return wasm_u32x4_extend_high_u16x8(a);
2693 }
2695 // CHECK-LABEL: @test_i64x2_extend_low_i32x4(
2696 // CHECK-NEXT:  entry:
2697 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2698 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2699 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2700 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2701 //
test_i64x2_extend_low_i32x4(v128_t a)2702 v128_t test_i64x2_extend_low_i32x4(v128_t a) {
2703   return wasm_i64x2_extend_low_i32x4(a);
2704 }
2706 // CHECK-LABEL: @test_i64x2_extend_high_i32x4(
2707 // CHECK-NEXT:  entry:
2708 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2709 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2710 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2711 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2712 //
test_i64x2_extend_high_i32x4(v128_t a)2713 v128_t test_i64x2_extend_high_i32x4(v128_t a) {
2714   return wasm_i64x2_extend_high_i32x4(a);
2715 }
2717 // CHECK-LABEL: @test_u64x2_extend_low_u32x4(
2718 // CHECK-NEXT:  entry:
2719 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2720 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2721 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2722 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2723 //
test_u64x2_extend_low_u32x4(v128_t a)2724 v128_t test_u64x2_extend_low_u32x4(v128_t a) {
2725   return wasm_u64x2_extend_low_u32x4(a);
2726 }
2728 // CHECK-LABEL: @test_u64x2_extend_high_u32x4(
2729 // CHECK-NEXT:  entry:
2730 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2731 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2732 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2733 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2734 //
test_u64x2_extend_high_u32x4(v128_t a)2735 v128_t test_u64x2_extend_high_u32x4(v128_t a) {
2736   return wasm_u64x2_extend_high_u32x4(a);
2737 }
2739 // CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
2740 // CHECK-NEXT:  entry:
2741 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2742 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR6]]
2743 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2744 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2745 //
test_i16x8_extadd_pairwise_i8x16(v128_t a)2746 v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
2747   return wasm_i16x8_extadd_pairwise_i8x16(a);
2748 }
2750 // CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
2751 // CHECK-NEXT:  entry:
2752 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2753 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR6]]
2754 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2755 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2756 //
test_u16x8_extadd_pairwise_u8x16(v128_t a)2757 v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
2758   return wasm_u16x8_extadd_pairwise_u8x16(a);
2759 }
2761 // CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
2762 // CHECK-NEXT:  entry:
2763 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2764 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR6]]
2765 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2766 //
test_i32x4_extadd_pairwise_i16x8(v128_t a)2767 v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
2768   return wasm_i32x4_extadd_pairwise_i16x8(a);
2769 }
2771 // CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
2772 // CHECK-NEXT:  entry:
2773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2774 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR6]]
2775 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2776 //
test_u32x4_extadd_pairwise_u16x8(v128_t a)2777 v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
2778   return wasm_u32x4_extadd_pairwise_u16x8(a);
2779 }
2781 // CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
2782 // CHECK-NEXT:  entry:
2783 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2784 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2785 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2786 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2787 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2788 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2789 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2790 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2791 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2792 //
test_i16x8_extmul_low_i8x16(v128_t a,v128_t b)2793 v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
2794   return wasm_i16x8_extmul_low_i8x16(a, b);
2795 }
2797 // CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
2798 // CHECK-NEXT:  entry:
2799 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2800 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2801 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2802 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2803 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2804 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2805 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2806 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2807 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2808 //
test_i16x8_extmul_high_i8x16(v128_t a,v128_t b)2809 v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
2810   return wasm_i16x8_extmul_high_i8x16(a, b);
2811 }
2813 // CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
2814 // CHECK-NEXT:  entry:
2815 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2816 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2817 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2818 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2819 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2820 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2821 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2822 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2823 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2824 //
test_u16x8_extmul_low_u8x16(v128_t a,v128_t b)2825 v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
2826   return wasm_u16x8_extmul_low_u8x16(a, b);
2827 }
2829 // CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
2830 // CHECK-NEXT:  entry:
2831 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2832 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2833 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
2834 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2835 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2836 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
2837 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
2838 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
2839 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2840 //
test_u16x8_extmul_high_u8x16(v128_t a,v128_t b)2841 v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
2842   return wasm_u16x8_extmul_high_u8x16(a, b);
2843 }
2845 // CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
2846 // CHECK-NEXT:  entry:
2847 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2848 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2849 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2850 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2851 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2852 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2853 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2854 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
2855 //
test_i32x4_extmul_low_i16x8(v128_t a,v128_t b)2856 v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
2857   return wasm_i32x4_extmul_low_i16x8(a, b);
2858 }
2860 // CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
2861 // CHECK-NEXT:  entry:
2862 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2863 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2864 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2865 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2866 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2867 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2868 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2869 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
2870 //
test_i32x4_extmul_high_i16x8(v128_t a,v128_t b)2871 v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
2872   return wasm_i32x4_extmul_high_i16x8(a, b);
2873 }
2875 // CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
2876 // CHECK-NEXT:  entry:
2877 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2878 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2879 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2880 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2881 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2882 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2883 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2884 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
2885 //
test_u32x4_extmul_low_u16x8(v128_t a,v128_t b)2886 v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
2887   return wasm_u32x4_extmul_low_u16x8(a, b);
2888 }
2890 // CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
2891 // CHECK-NEXT:  entry:
2892 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2893 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2894 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
2895 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2896 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2897 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
2898 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
2899 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
2900 //
test_u32x4_extmul_high_u16x8(v128_t a,v128_t b)2901 v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
2902   return wasm_u32x4_extmul_high_u16x8(a, b);
2903 }
2905 // CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
2906 // CHECK-NEXT:  entry:
2907 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2908 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2909 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2910 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2911 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2912 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2913 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2914 //
test_i64x2_extmul_low_i32x4(v128_t a,v128_t b)2915 v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
2916   return wasm_i64x2_extmul_low_i32x4(a, b);
2917 }
2919 // CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
2920 // CHECK-NEXT:  entry:
2921 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2922 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2923 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2924 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2925 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2926 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2927 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2928 //
test_i64x2_extmul_high_i32x4(v128_t a,v128_t b)2929 v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
2930   return wasm_i64x2_extmul_high_i32x4(a, b);
2931 }
2933 // CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
2934 // CHECK-NEXT:  entry:
2935 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2936 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2937 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2938 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2939 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2940 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2941 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2942 //
test_u64x2_extmul_low_u32x4(v128_t a,v128_t b)2943 v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
2944   return wasm_u64x2_extmul_low_u32x4(a, b);
2945 }
2947 // CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
2948 // CHECK-NEXT:  entry:
2949 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2950 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
2951 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2952 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
2953 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
2954 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2955 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2956 //
test_u64x2_extmul_high_u32x4(v128_t a,v128_t b)2957 v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
2958   return wasm_u64x2_extmul_high_u32x4(a, b);
2959 }
2961 // CHECK-LABEL: @test_i16x8_q15mulr_sat(
2962 // CHECK-NEXT:  entry:
2963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2964 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2965 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
2966 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
2967 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2968 //
test_i16x8_q15mulr_sat(v128_t a,v128_t b)2969 v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
2970   return wasm_i16x8_q15mulr_sat(a, b);
2971 }