1 // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
2 // RUN:   -fallow-half-arguments-and-returns -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_neon.h>
6 
7 // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
8 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
9 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)10 uint8_t test_vget_lane_u8(uint8x8_t a) {
11   return vget_lane_u8(a, 7);
12 }
13 
14 // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
15 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
16 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)17 uint16_t test_vget_lane_u16(uint16x4_t a) {
18   return vget_lane_u16(a, 3);
19 }
20 
21 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
22 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
23 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)24 uint32_t test_vget_lane_u32(uint32x2_t a) {
25   return vget_lane_u32(a, 1);
26 }
27 
28 // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
29 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
30 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)31 int8_t test_vget_lane_s8(int8x8_t a) {
32   return vget_lane_s8(a, 7);
33 }
34 
35 // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
36 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
37 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)38 int16_t test_vget_lane_s16(int16x4_t a) {
39   return vget_lane_s16(a, 3);
40 }
41 
42 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
43 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
44 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)45 int32_t test_vget_lane_s32(int32x2_t a) {
46   return vget_lane_s32(a, 1);
47 }
48 
49 // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
50 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
51 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)52 poly8_t test_vget_lane_p8(poly8x8_t a) {
53   return vget_lane_p8(a, 7);
54 }
55 
56 // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
57 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
58 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)59 poly16_t test_vget_lane_p16(poly16x4_t a) {
60   return vget_lane_p16(a, 3);
61 }
62 
63 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
64 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
65 // CHECK:   ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)66 float32_t test_vget_lane_f32(float32x2_t a) {
67   return vget_lane_f32(a, 1);
68 }
69 
70 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
71 // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
72 // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
73 // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
74 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
75 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
76 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
77 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
78 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
79 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
80 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
81 // CHECK:   ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)82 float32_t test_vget_lane_f16(float16x4_t a) {
83   return vget_lane_f16(a, 1);
84 }
85 
86 // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 {
87 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
88 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_u8(uint8x16_t a)89 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
90   return vgetq_lane_u8(a, 15);
91 }
92 
93 // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
94 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
95 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_u16(uint16x8_t a)96 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
97   return vgetq_lane_u16(a, 7);
98 }
99 
100 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
101 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
102 // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_u32(uint32x4_t a)103 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
104   return vgetq_lane_u32(a, 3);
105 }
106 
107 // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 {
108 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
109 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_s8(int8x16_t a)110 int8_t test_vgetq_lane_s8(int8x16_t a) {
111   return vgetq_lane_s8(a, 15);
112 }
113 
114 // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
115 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
116 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_s16(int16x8_t a)117 int16_t test_vgetq_lane_s16(int16x8_t a) {
118   return vgetq_lane_s16(a, 7);
119 }
120 
121 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
122 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
123 // CHECK:   ret i32 [[VGETQ_LANE]]
test_vgetq_lane_s32(int32x4_t a)124 int32_t test_vgetq_lane_s32(int32x4_t a) {
125   return vgetq_lane_s32(a, 3);
126 }
127 
128 // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 {
129 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
130 // CHECK:   ret i8 [[VGETQ_LANE]]
test_vgetq_lane_p8(poly8x16_t a)131 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
132   return vgetq_lane_p8(a, 15);
133 }
134 
135 // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
136 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
137 // CHECK:   ret i16 [[VGETQ_LANE]]
test_vgetq_lane_p16(poly16x8_t a)138 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
139   return vgetq_lane_p16(a, 7);
140 }
141 
142 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
143 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
144 // CHECK:   ret float [[VGETQ_LANE]]
test_vgetq_lane_f32(float32x4_t a)145 float32_t test_vgetq_lane_f32(float32x4_t a) {
146   return vgetq_lane_f32(a, 3);
147 }
148 
149 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
150 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
151 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
152 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
153 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
154 // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
155 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
156 // CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
157 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
158 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
159 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
160 // CHECK:   ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)161 float32_t test_vgetq_lane_f16(float16x8_t a) {
162   return vgetq_lane_f16(a, 3);
163 }
164 
165 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
166 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
167 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)168 int64_t test_vget_lane_s64(int64x1_t a) {
169   return vget_lane_s64(a, 0);
170 }
171 
172 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
173 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
174 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)175 uint64_t test_vget_lane_u64(uint64x1_t a) {
176   return vget_lane_u64(a, 0);
177 }
178 
179 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
180 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
181 // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_s64(int64x2_t a)182 int64_t test_vgetq_lane_s64(int64x2_t a) {
183   return vgetq_lane_s64(a, 1);
184 }
185 
186 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
187 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
188 // CHECK:   ret i64 [[VGETQ_LANE]]
test_vgetq_lane_u64(uint64x2_t a)189 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
190   return vgetq_lane_u64(a, 1);
191 }
192 
193 
194 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
195 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
196 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)197 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
198   return vset_lane_u8(a, b, 7);
199 }
200 
201 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
202 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
203 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)204 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
205   return vset_lane_u16(a, b, 3);
206 }
207 
208 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
209 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
210 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)211 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
212   return vset_lane_u32(a, b, 1);
213 }
214 
215 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
216 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
217 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)218 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
219   return vset_lane_s8(a, b, 7);
220 }
221 
222 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
223 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
224 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)225 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
226   return vset_lane_s16(a, b, 3);
227 }
228 
229 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
230 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
231 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)232 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
233   return vset_lane_s32(a, b, 1);
234 }
235 
236 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
237 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
238 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)239 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
240   return vset_lane_p8(a, b, 7);
241 }
242 
243 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
244 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
245 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)246 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
247   return vset_lane_p16(a, b, 3);
248 }
249 
250 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
251 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
252 // CHECK:   ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)253 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
254   return vset_lane_f32(a, b, 1);
255 }
256 
257 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
258 // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
259 // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
260 // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
261 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
262 // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
263 // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
264 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
265 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
266 // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
267 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
268 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 3
269 // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
270 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
271 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
272 // CHECK:   ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)273 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
274   return vset_lane_f16(*a, b, 3);
275 }
276 
277 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 {
278 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
279 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)280 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
281   return vsetq_lane_u8(a, b, 15);
282 }
283 
284 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
285 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
286 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)287 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
288   return vsetq_lane_u16(a, b, 7);
289 }
290 
291 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
292 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
293 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)294 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
295   return vsetq_lane_u32(a, b, 3);
296 }
297 
298 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 {
299 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
300 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)301 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
302   return vsetq_lane_s8(a, b, 15);
303 }
304 
305 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
306 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
307 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)308 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
309   return vsetq_lane_s16(a, b, 7);
310 }
311 
312 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
313 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
314 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)315 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
316   return vsetq_lane_s32(a, b, 3);
317 }
318 
319 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 {
320 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
321 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)322 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
323   return vsetq_lane_p8(a, b, 15);
324 }
325 
326 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
327 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
328 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)329 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
330   return vsetq_lane_p16(a, b, 7);
331 }
332 
333 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
334 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
335 // CHECK:   ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)336 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
337   return vsetq_lane_f32(a, b, 3);
338 }
339 
340 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
341 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
342 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
343 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
344 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
345 // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
346 // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
347 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
348 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
349 // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
350 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
351 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 7
352 // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
353 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
354 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
355 // CHECK:   ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)356 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
357   return vsetq_lane_f16(*a, b, 7);
358 }
359 
360 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
361 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
362 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)363 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
364   return vset_lane_s64(a, b, 0);
365 }
366 
367 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
368 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
369 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)370 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
371   return vset_lane_u64(a, b, 0);
372 }
373 
374 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
375 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
376 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)377 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
378   return vsetq_lane_s64(a, b, 1);
379 }
380 
381 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
382 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
383 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)384 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
385   return vsetq_lane_u64(a, b, 1);
386 }
387 
388 // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
389 // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
390