1 /* ARM NEON intrinsics include file.
2 
3    Copyright (C) 2011-2016 Free Software Foundation, Inc.
4    Contributed by ARM Ltd.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
29 
30 #pragma GCC push_options
31 #pragma GCC target ("+nothing+simd")
32 
33 #include <stdint.h>
34 
35 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
36 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
37 
38 typedef __Int8x8_t int8x8_t;
39 typedef __Int16x4_t int16x4_t;
40 typedef __Int32x2_t int32x2_t;
41 typedef __Int64x1_t int64x1_t;
42 typedef __Float16x4_t float16x4_t;
43 typedef __Float32x2_t float32x2_t;
44 typedef __Poly8x8_t poly8x8_t;
45 typedef __Poly16x4_t poly16x4_t;
46 typedef __Uint8x8_t uint8x8_t;
47 typedef __Uint16x4_t uint16x4_t;
48 typedef __Uint32x2_t uint32x2_t;
49 typedef __Float64x1_t float64x1_t;
50 typedef __Uint64x1_t uint64x1_t;
51 typedef __Int8x16_t int8x16_t;
52 typedef __Int16x8_t int16x8_t;
53 typedef __Int32x4_t int32x4_t;
54 typedef __Int64x2_t int64x2_t;
55 typedef __Float16x8_t float16x8_t;
56 typedef __Float32x4_t float32x4_t;
57 typedef __Float64x2_t float64x2_t;
58 typedef __Poly8x16_t poly8x16_t;
59 typedef __Poly16x8_t poly16x8_t;
60 typedef __Poly64x2_t poly64x2_t;
61 typedef __Uint8x16_t uint8x16_t;
62 typedef __Uint16x8_t uint16x8_t;
63 typedef __Uint32x4_t uint32x4_t;
64 typedef __Uint64x2_t uint64x2_t;
65 
66 typedef __Poly8_t poly8_t;
67 typedef __Poly16_t poly16_t;
68 typedef __Poly64_t poly64_t;
69 typedef __Poly128_t poly128_t;
70 
71 typedef __fp16 float16_t;
72 typedef float float32_t;
73 typedef double float64_t;
74 
75 typedef struct int8x8x2_t
76 {
77   int8x8_t val[2];
78 } int8x8x2_t;
79 
80 typedef struct int8x16x2_t
81 {
82   int8x16_t val[2];
83 } int8x16x2_t;
84 
85 typedef struct int16x4x2_t
86 {
87   int16x4_t val[2];
88 } int16x4x2_t;
89 
90 typedef struct int16x8x2_t
91 {
92   int16x8_t val[2];
93 } int16x8x2_t;
94 
95 typedef struct int32x2x2_t
96 {
97   int32x2_t val[2];
98 } int32x2x2_t;
99 
100 typedef struct int32x4x2_t
101 {
102   int32x4_t val[2];
103 } int32x4x2_t;
104 
105 typedef struct int64x1x2_t
106 {
107   int64x1_t val[2];
108 } int64x1x2_t;
109 
110 typedef struct int64x2x2_t
111 {
112   int64x2_t val[2];
113 } int64x2x2_t;
114 
115 typedef struct uint8x8x2_t
116 {
117   uint8x8_t val[2];
118 } uint8x8x2_t;
119 
120 typedef struct uint8x16x2_t
121 {
122   uint8x16_t val[2];
123 } uint8x16x2_t;
124 
125 typedef struct uint16x4x2_t
126 {
127   uint16x4_t val[2];
128 } uint16x4x2_t;
129 
130 typedef struct uint16x8x2_t
131 {
132   uint16x8_t val[2];
133 } uint16x8x2_t;
134 
135 typedef struct uint32x2x2_t
136 {
137   uint32x2_t val[2];
138 } uint32x2x2_t;
139 
140 typedef struct uint32x4x2_t
141 {
142   uint32x4_t val[2];
143 } uint32x4x2_t;
144 
145 typedef struct uint64x1x2_t
146 {
147   uint64x1_t val[2];
148 } uint64x1x2_t;
149 
150 typedef struct uint64x2x2_t
151 {
152   uint64x2_t val[2];
153 } uint64x2x2_t;
154 
155 typedef struct float16x4x2_t
156 {
157   float16x4_t val[2];
158 } float16x4x2_t;
159 
160 typedef struct float16x8x2_t
161 {
162   float16x8_t val[2];
163 } float16x8x2_t;
164 
165 typedef struct float32x2x2_t
166 {
167   float32x2_t val[2];
168 } float32x2x2_t;
169 
170 typedef struct float32x4x2_t
171 {
172   float32x4_t val[2];
173 } float32x4x2_t;
174 
175 typedef struct float64x2x2_t
176 {
177   float64x2_t val[2];
178 } float64x2x2_t;
179 
180 typedef struct float64x1x2_t
181 {
182   float64x1_t val[2];
183 } float64x1x2_t;
184 
185 typedef struct poly8x8x2_t
186 {
187   poly8x8_t val[2];
188 } poly8x8x2_t;
189 
190 typedef struct poly8x16x2_t
191 {
192   poly8x16_t val[2];
193 } poly8x16x2_t;
194 
195 typedef struct poly16x4x2_t
196 {
197   poly16x4_t val[2];
198 } poly16x4x2_t;
199 
200 typedef struct poly16x8x2_t
201 {
202   poly16x8_t val[2];
203 } poly16x8x2_t;
204 
205 typedef struct int8x8x3_t
206 {
207   int8x8_t val[3];
208 } int8x8x3_t;
209 
210 typedef struct int8x16x3_t
211 {
212   int8x16_t val[3];
213 } int8x16x3_t;
214 
215 typedef struct int16x4x3_t
216 {
217   int16x4_t val[3];
218 } int16x4x3_t;
219 
220 typedef struct int16x8x3_t
221 {
222   int16x8_t val[3];
223 } int16x8x3_t;
224 
225 typedef struct int32x2x3_t
226 {
227   int32x2_t val[3];
228 } int32x2x3_t;
229 
230 typedef struct int32x4x3_t
231 {
232   int32x4_t val[3];
233 } int32x4x3_t;
234 
235 typedef struct int64x1x3_t
236 {
237   int64x1_t val[3];
238 } int64x1x3_t;
239 
240 typedef struct int64x2x3_t
241 {
242   int64x2_t val[3];
243 } int64x2x3_t;
244 
245 typedef struct uint8x8x3_t
246 {
247   uint8x8_t val[3];
248 } uint8x8x3_t;
249 
250 typedef struct uint8x16x3_t
251 {
252   uint8x16_t val[3];
253 } uint8x16x3_t;
254 
255 typedef struct uint16x4x3_t
256 {
257   uint16x4_t val[3];
258 } uint16x4x3_t;
259 
260 typedef struct uint16x8x3_t
261 {
262   uint16x8_t val[3];
263 } uint16x8x3_t;
264 
265 typedef struct uint32x2x3_t
266 {
267   uint32x2_t val[3];
268 } uint32x2x3_t;
269 
270 typedef struct uint32x4x3_t
271 {
272   uint32x4_t val[3];
273 } uint32x4x3_t;
274 
275 typedef struct uint64x1x3_t
276 {
277   uint64x1_t val[3];
278 } uint64x1x3_t;
279 
280 typedef struct uint64x2x3_t
281 {
282   uint64x2_t val[3];
283 } uint64x2x3_t;
284 
285 typedef struct float16x4x3_t
286 {
287   float16x4_t val[3];
288 } float16x4x3_t;
289 
290 typedef struct float16x8x3_t
291 {
292   float16x8_t val[3];
293 } float16x8x3_t;
294 
295 typedef struct float32x2x3_t
296 {
297   float32x2_t val[3];
298 } float32x2x3_t;
299 
300 typedef struct float32x4x3_t
301 {
302   float32x4_t val[3];
303 } float32x4x3_t;
304 
305 typedef struct float64x2x3_t
306 {
307   float64x2_t val[3];
308 } float64x2x3_t;
309 
310 typedef struct float64x1x3_t
311 {
312   float64x1_t val[3];
313 } float64x1x3_t;
314 
315 typedef struct poly8x8x3_t
316 {
317   poly8x8_t val[3];
318 } poly8x8x3_t;
319 
320 typedef struct poly8x16x3_t
321 {
322   poly8x16_t val[3];
323 } poly8x16x3_t;
324 
325 typedef struct poly16x4x3_t
326 {
327   poly16x4_t val[3];
328 } poly16x4x3_t;
329 
330 typedef struct poly16x8x3_t
331 {
332   poly16x8_t val[3];
333 } poly16x8x3_t;
334 
335 typedef struct int8x8x4_t
336 {
337   int8x8_t val[4];
338 } int8x8x4_t;
339 
340 typedef struct int8x16x4_t
341 {
342   int8x16_t val[4];
343 } int8x16x4_t;
344 
345 typedef struct int16x4x4_t
346 {
347   int16x4_t val[4];
348 } int16x4x4_t;
349 
350 typedef struct int16x8x4_t
351 {
352   int16x8_t val[4];
353 } int16x8x4_t;
354 
355 typedef struct int32x2x4_t
356 {
357   int32x2_t val[4];
358 } int32x2x4_t;
359 
360 typedef struct int32x4x4_t
361 {
362   int32x4_t val[4];
363 } int32x4x4_t;
364 
365 typedef struct int64x1x4_t
366 {
367   int64x1_t val[4];
368 } int64x1x4_t;
369 
370 typedef struct int64x2x4_t
371 {
372   int64x2_t val[4];
373 } int64x2x4_t;
374 
375 typedef struct uint8x8x4_t
376 {
377   uint8x8_t val[4];
378 } uint8x8x4_t;
379 
380 typedef struct uint8x16x4_t
381 {
382   uint8x16_t val[4];
383 } uint8x16x4_t;
384 
385 typedef struct uint16x4x4_t
386 {
387   uint16x4_t val[4];
388 } uint16x4x4_t;
389 
390 typedef struct uint16x8x4_t
391 {
392   uint16x8_t val[4];
393 } uint16x8x4_t;
394 
395 typedef struct uint32x2x4_t
396 {
397   uint32x2_t val[4];
398 } uint32x2x4_t;
399 
400 typedef struct uint32x4x4_t
401 {
402   uint32x4_t val[4];
403 } uint32x4x4_t;
404 
405 typedef struct uint64x1x4_t
406 {
407   uint64x1_t val[4];
408 } uint64x1x4_t;
409 
410 typedef struct uint64x2x4_t
411 {
412   uint64x2_t val[4];
413 } uint64x2x4_t;
414 
415 typedef struct float16x4x4_t
416 {
417   float16x4_t val[4];
418 } float16x4x4_t;
419 
420 typedef struct float16x8x4_t
421 {
422   float16x8_t val[4];
423 } float16x8x4_t;
424 
425 typedef struct float32x2x4_t
426 {
427   float32x2_t val[4];
428 } float32x2x4_t;
429 
430 typedef struct float32x4x4_t
431 {
432   float32x4_t val[4];
433 } float32x4x4_t;
434 
435 typedef struct float64x2x4_t
436 {
437   float64x2_t val[4];
438 } float64x2x4_t;
439 
440 typedef struct float64x1x4_t
441 {
442   float64x1_t val[4];
443 } float64x1x4_t;
444 
445 typedef struct poly8x8x4_t
446 {
447   poly8x8_t val[4];
448 } poly8x8x4_t;
449 
450 typedef struct poly8x16x4_t
451 {
452   poly8x16_t val[4];
453 } poly8x16x4_t;
454 
455 typedef struct poly16x4x4_t
456 {
457   poly16x4_t val[4];
458 } poly16x4x4_t;
459 
460 typedef struct poly16x8x4_t
461 {
462   poly16x8_t val[4];
463 } poly16x8x4_t;
464 
465 /* __aarch64_vdup_lane internal macros.  */
466 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
467   vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
468 
469 #define __aarch64_vdup_lane_f32(__a, __b) \
470    __aarch64_vdup_lane_any (f32, , __a, __b)
471 #define __aarch64_vdup_lane_f64(__a, __b) \
472    __aarch64_vdup_lane_any (f64, , __a, __b)
473 #define __aarch64_vdup_lane_p8(__a, __b) \
474    __aarch64_vdup_lane_any (p8, , __a, __b)
475 #define __aarch64_vdup_lane_p16(__a, __b) \
476    __aarch64_vdup_lane_any (p16, , __a, __b)
477 #define __aarch64_vdup_lane_s8(__a, __b) \
478    __aarch64_vdup_lane_any (s8, , __a, __b)
479 #define __aarch64_vdup_lane_s16(__a, __b) \
480    __aarch64_vdup_lane_any (s16, , __a, __b)
481 #define __aarch64_vdup_lane_s32(__a, __b) \
482    __aarch64_vdup_lane_any (s32, , __a, __b)
483 #define __aarch64_vdup_lane_s64(__a, __b) \
484   __aarch64_vdup_lane_any (s64, , __a, __b)
485 #define __aarch64_vdup_lane_u8(__a, __b) \
486    __aarch64_vdup_lane_any (u8, , __a, __b)
487 #define __aarch64_vdup_lane_u16(__a, __b) \
488    __aarch64_vdup_lane_any (u16, , __a, __b)
489 #define __aarch64_vdup_lane_u32(__a, __b) \
490    __aarch64_vdup_lane_any (u32, , __a, __b)
491 #define __aarch64_vdup_lane_u64(__a, __b) \
492    __aarch64_vdup_lane_any (u64, , __a, __b)
493 
494 /* __aarch64_vdup_laneq internal macros.  */
495 #define __aarch64_vdup_laneq_f32(__a, __b) \
496    __aarch64_vdup_lane_any (f32, , __a, __b)
497 #define __aarch64_vdup_laneq_f64(__a, __b) \
498    __aarch64_vdup_lane_any (f64, , __a, __b)
499 #define __aarch64_vdup_laneq_p8(__a, __b) \
500    __aarch64_vdup_lane_any (p8, , __a, __b)
501 #define __aarch64_vdup_laneq_p16(__a, __b) \
502    __aarch64_vdup_lane_any (p16, , __a, __b)
503 #define __aarch64_vdup_laneq_s8(__a, __b) \
504    __aarch64_vdup_lane_any (s8, , __a, __b)
505 #define __aarch64_vdup_laneq_s16(__a, __b) \
506    __aarch64_vdup_lane_any (s16, , __a, __b)
507 #define __aarch64_vdup_laneq_s32(__a, __b) \
508    __aarch64_vdup_lane_any (s32, , __a, __b)
509 #define __aarch64_vdup_laneq_s64(__a, __b) \
510    __aarch64_vdup_lane_any (s64, , __a, __b)
511 #define __aarch64_vdup_laneq_u8(__a, __b) \
512    __aarch64_vdup_lane_any (u8, , __a, __b)
513 #define __aarch64_vdup_laneq_u16(__a, __b) \
514    __aarch64_vdup_lane_any (u16, , __a, __b)
515 #define __aarch64_vdup_laneq_u32(__a, __b) \
516    __aarch64_vdup_lane_any (u32, , __a, __b)
517 #define __aarch64_vdup_laneq_u64(__a, __b) \
518    __aarch64_vdup_lane_any (u64, , __a, __b)
519 
520 /* __aarch64_vdupq_lane internal macros.  */
521 #define __aarch64_vdupq_lane_f32(__a, __b) \
522    __aarch64_vdup_lane_any (f32, q, __a, __b)
523 #define __aarch64_vdupq_lane_f64(__a, __b) \
524    __aarch64_vdup_lane_any (f64, q, __a, __b)
525 #define __aarch64_vdupq_lane_p8(__a, __b) \
526    __aarch64_vdup_lane_any (p8, q, __a, __b)
527 #define __aarch64_vdupq_lane_p16(__a, __b) \
528    __aarch64_vdup_lane_any (p16, q, __a, __b)
529 #define __aarch64_vdupq_lane_s8(__a, __b) \
530    __aarch64_vdup_lane_any (s8, q, __a, __b)
531 #define __aarch64_vdupq_lane_s16(__a, __b) \
532    __aarch64_vdup_lane_any (s16, q, __a, __b)
533 #define __aarch64_vdupq_lane_s32(__a, __b) \
534    __aarch64_vdup_lane_any (s32, q, __a, __b)
535 #define __aarch64_vdupq_lane_s64(__a, __b) \
536    __aarch64_vdup_lane_any (s64, q, __a, __b)
537 #define __aarch64_vdupq_lane_u8(__a, __b) \
538    __aarch64_vdup_lane_any (u8, q, __a, __b)
539 #define __aarch64_vdupq_lane_u16(__a, __b) \
540    __aarch64_vdup_lane_any (u16, q, __a, __b)
541 #define __aarch64_vdupq_lane_u32(__a, __b) \
542    __aarch64_vdup_lane_any (u32, q, __a, __b)
543 #define __aarch64_vdupq_lane_u64(__a, __b) \
544    __aarch64_vdup_lane_any (u64, q, __a, __b)
545 
546 /* __aarch64_vdupq_laneq internal macros.  */
547 #define __aarch64_vdupq_laneq_f32(__a, __b) \
548    __aarch64_vdup_lane_any (f32, q, __a, __b)
549 #define __aarch64_vdupq_laneq_f64(__a, __b) \
550    __aarch64_vdup_lane_any (f64, q, __a, __b)
551 #define __aarch64_vdupq_laneq_p8(__a, __b) \
552    __aarch64_vdup_lane_any (p8, q, __a, __b)
553 #define __aarch64_vdupq_laneq_p16(__a, __b) \
554    __aarch64_vdup_lane_any (p16, q, __a, __b)
555 #define __aarch64_vdupq_laneq_s8(__a, __b) \
556    __aarch64_vdup_lane_any (s8, q, __a, __b)
557 #define __aarch64_vdupq_laneq_s16(__a, __b) \
558    __aarch64_vdup_lane_any (s16, q, __a, __b)
559 #define __aarch64_vdupq_laneq_s32(__a, __b) \
560    __aarch64_vdup_lane_any (s32, q, __a, __b)
561 #define __aarch64_vdupq_laneq_s64(__a, __b) \
562    __aarch64_vdup_lane_any (s64, q, __a, __b)
563 #define __aarch64_vdupq_laneq_u8(__a, __b) \
564    __aarch64_vdup_lane_any (u8, q, __a, __b)
565 #define __aarch64_vdupq_laneq_u16(__a, __b) \
566    __aarch64_vdup_lane_any (u16, q, __a, __b)
567 #define __aarch64_vdupq_laneq_u32(__a, __b) \
568    __aarch64_vdup_lane_any (u32, q, __a, __b)
569 #define __aarch64_vdupq_laneq_u64(__a, __b) \
570    __aarch64_vdup_lane_any (u64, q, __a, __b)
571 
572 /* Internal macro for lane indices.  */
573 
574 #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
575 #define __AARCH64_LANE_CHECK(__vec, __idx)	\
576 	__builtin_aarch64_im_lane_boundsi (sizeof(__vec), sizeof(__vec[0]), __idx)
577 
578 /* For big-endian, GCC's vector indices are the opposite way around
579    to the architectural lane indices used by Neon intrinsics.  */
580 #ifdef __AARCH64EB__
581 #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
582 #else
583 #define __aarch64_lane(__vec, __idx) __idx
584 #endif
585 
586 /* vget_lane internal macro.  */
587 #define __aarch64_vget_lane_any(__vec, __index)				\
588   __extension__								\
589   ({									\
590     __AARCH64_LANE_CHECK (__vec, __index);				\
591     __vec[__aarch64_lane (__vec, __index)];				\
592   })
593 
594 /* vset_lane and vld1_lane internal macro.  */
595 #define __aarch64_vset_lane_any(__elem, __vec, __index)			\
596   __extension__								\
597   ({									\
598     __AARCH64_LANE_CHECK (__vec, __index);				\
599     __vec[__aarch64_lane (__vec, __index)] = __elem;			\
600     __vec;								\
601   })
602 
603 /* vadd  */
604 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vadd_s8(int8x8_t __a,int8x8_t __b)605 vadd_s8 (int8x8_t __a, int8x8_t __b)
606 {
607   return __a + __b;
608 }
609 
610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vadd_s16(int16x4_t __a,int16x4_t __b)611 vadd_s16 (int16x4_t __a, int16x4_t __b)
612 {
613   return __a + __b;
614 }
615 
616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vadd_s32(int32x2_t __a,int32x2_t __b)617 vadd_s32 (int32x2_t __a, int32x2_t __b)
618 {
619   return __a + __b;
620 }
621 
622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vadd_f32(float32x2_t __a,float32x2_t __b)623 vadd_f32 (float32x2_t __a, float32x2_t __b)
624 {
625   return __a + __b;
626 }
627 
628 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vadd_f64(float64x1_t __a,float64x1_t __b)629 vadd_f64 (float64x1_t __a, float64x1_t __b)
630 {
631   return __a + __b;
632 }
633 
634 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vadd_u8(uint8x8_t __a,uint8x8_t __b)635 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
636 {
637   return __a + __b;
638 }
639 
640 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vadd_u16(uint16x4_t __a,uint16x4_t __b)641 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
642 {
643   return __a + __b;
644 }
645 
646 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vadd_u32(uint32x2_t __a,uint32x2_t __b)647 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
648 {
649   return __a + __b;
650 }
651 
652 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vadd_s64(int64x1_t __a,int64x1_t __b)653 vadd_s64 (int64x1_t __a, int64x1_t __b)
654 {
655   return __a + __b;
656 }
657 
658 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vadd_u64(uint64x1_t __a,uint64x1_t __b)659 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
660 {
661   return __a + __b;
662 }
663 
664 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vaddq_s8(int8x16_t __a,int8x16_t __b)665 vaddq_s8 (int8x16_t __a, int8x16_t __b)
666 {
667   return __a + __b;
668 }
669 
670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddq_s16(int16x8_t __a,int16x8_t __b)671 vaddq_s16 (int16x8_t __a, int16x8_t __b)
672 {
673   return __a + __b;
674 }
675 
676 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddq_s32(int32x4_t __a,int32x4_t __b)677 vaddq_s32 (int32x4_t __a, int32x4_t __b)
678 {
679   return __a + __b;
680 }
681 
682 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddq_s64(int64x2_t __a,int64x2_t __b)683 vaddq_s64 (int64x2_t __a, int64x2_t __b)
684 {
685   return __a + __b;
686 }
687 
688 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vaddq_f32(float32x4_t __a,float32x4_t __b)689 vaddq_f32 (float32x4_t __a, float32x4_t __b)
690 {
691   return __a + __b;
692 }
693 
694 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vaddq_f64(float64x2_t __a,float64x2_t __b)695 vaddq_f64 (float64x2_t __a, float64x2_t __b)
696 {
697   return __a + __b;
698 }
699 
700 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaddq_u8(uint8x16_t __a,uint8x16_t __b)701 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
702 {
703   return __a + __b;
704 }
705 
706 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddq_u16(uint16x8_t __a,uint16x8_t __b)707 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
708 {
709   return __a + __b;
710 }
711 
712 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddq_u32(uint32x4_t __a,uint32x4_t __b)713 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
714 {
715   return __a + __b;
716 }
717 
718 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddq_u64(uint64x2_t __a,uint64x2_t __b)719 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
720 {
721   return __a + __b;
722 }
723 
724 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_s8(int8x8_t __a,int8x8_t __b)725 vaddl_s8 (int8x8_t __a, int8x8_t __b)
726 {
727   return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
728 }
729 
730 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_s16(int16x4_t __a,int16x4_t __b)731 vaddl_s16 (int16x4_t __a, int16x4_t __b)
732 {
733   return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
734 }
735 
736 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_s32(int32x2_t __a,int32x2_t __b)737 vaddl_s32 (int32x2_t __a, int32x2_t __b)
738 {
739   return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
740 }
741 
742 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_u8(uint8x8_t __a,uint8x8_t __b)743 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
744 {
745   return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
746 						   (int8x8_t) __b);
747 }
748 
749 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_u16(uint16x4_t __a,uint16x4_t __b)750 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
751 {
752   return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
753 						   (int16x4_t) __b);
754 }
755 
756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_u32(uint32x2_t __a,uint32x2_t __b)757 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
758 {
759   return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
760 						   (int32x2_t) __b);
761 }
762 
763 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_high_s8(int8x16_t __a,int8x16_t __b)764 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
765 {
766   return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
767 }
768 
769 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_high_s16(int16x8_t __a,int16x8_t __b)770 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
771 {
772   return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
773 }
774 
775 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_high_s32(int32x4_t __a,int32x4_t __b)776 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
777 {
778   return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
779 }
780 
781 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_high_u8(uint8x16_t __a,uint8x16_t __b)782 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
783 {
784   return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
785 						     (int8x16_t) __b);
786 }
787 
788 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_high_u16(uint16x8_t __a,uint16x8_t __b)789 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
790 {
791   return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
792 						    (int16x8_t) __b);
793 }
794 
795 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_high_u32(uint32x4_t __a,uint32x4_t __b)796 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
797 {
798   return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
799 						    (int32x4_t) __b);
800 }
801 
802 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddw_s8(int16x8_t __a,int8x8_t __b)803 vaddw_s8 (int16x8_t __a, int8x8_t __b)
804 {
805   return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
806 }
807 
808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddw_s16(int32x4_t __a,int16x4_t __b)809 vaddw_s16 (int32x4_t __a, int16x4_t __b)
810 {
811   return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
812 }
813 
814 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddw_s32(int64x2_t __a,int32x2_t __b)815 vaddw_s32 (int64x2_t __a, int32x2_t __b)
816 {
817   return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
818 }
819 
820 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddw_u8(uint16x8_t __a,uint8x8_t __b)821 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
822 {
823   return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
824 						   (int8x8_t) __b);
825 }
826 
827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddw_u16(uint32x4_t __a,uint16x4_t __b)828 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
829 {
830   return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
831 						   (int16x4_t) __b);
832 }
833 
834 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddw_u32(uint64x2_t __a,uint32x2_t __b)835 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
836 {
837   return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
838 						   (int32x2_t) __b);
839 }
840 
841 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddw_high_s8(int16x8_t __a,int8x16_t __b)842 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
843 {
844   return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
845 }
846 
847 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddw_high_s16(int32x4_t __a,int16x8_t __b)848 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
849 {
850   return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
851 }
852 
853 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddw_high_s32(int64x2_t __a,int32x4_t __b)854 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
855 {
856   return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
857 }
858 
859 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddw_high_u8(uint16x8_t __a,uint8x16_t __b)860 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
861 {
862   return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
863 						     (int8x16_t) __b);
864 }
865 
866 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddw_high_u16(uint32x4_t __a,uint16x8_t __b)867 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
868 {
869   return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
870 						    (int16x8_t) __b);
871 }
872 
873 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddw_high_u32(uint64x2_t __a,uint32x4_t __b)874 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
875 {
876   return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
877 						    (int32x4_t) __b);
878 }
879 
880 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vhadd_s8(int8x8_t __a,int8x8_t __b)881 vhadd_s8 (int8x8_t __a, int8x8_t __b)
882 {
883   return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
884 }
885 
886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vhadd_s16(int16x4_t __a,int16x4_t __b)887 vhadd_s16 (int16x4_t __a, int16x4_t __b)
888 {
889   return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
890 }
891 
892 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vhadd_s32(int32x2_t __a,int32x2_t __b)893 vhadd_s32 (int32x2_t __a, int32x2_t __b)
894 {
895   return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
896 }
897 
898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vhadd_u8(uint8x8_t __a,uint8x8_t __b)899 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
900 {
901   return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
902 						  (int8x8_t) __b);
903 }
904 
905 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vhadd_u16(uint16x4_t __a,uint16x4_t __b)906 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
907 {
908   return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
909 						   (int16x4_t) __b);
910 }
911 
912 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vhadd_u32(uint32x2_t __a,uint32x2_t __b)913 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
914 {
915   return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
916 						   (int32x2_t) __b);
917 }
918 
919 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vhaddq_s8(int8x16_t __a,int8x16_t __b)920 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
921 {
922   return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
923 }
924 
925 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vhaddq_s16(int16x8_t __a,int16x8_t __b)926 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
927 {
928   return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
929 }
930 
931 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vhaddq_s32(int32x4_t __a,int32x4_t __b)932 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
933 {
934   return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
935 }
936 
937 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vhaddq_u8(uint8x16_t __a,uint8x16_t __b)938 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
939 {
940   return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
941 						    (int8x16_t) __b);
942 }
943 
944 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vhaddq_u16(uint16x8_t __a,uint16x8_t __b)945 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
946 {
947   return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
948 						   (int16x8_t) __b);
949 }
950 
951 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vhaddq_u32(uint32x4_t __a,uint32x4_t __b)952 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
953 {
954   return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
955 						   (int32x4_t) __b);
956 }
957 
958 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrhadd_s8(int8x8_t __a,int8x8_t __b)959 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
960 {
961   return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
962 }
963 
964 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrhadd_s16(int16x4_t __a,int16x4_t __b)965 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
966 {
967   return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
968 }
969 
970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrhadd_s32(int32x2_t __a,int32x2_t __b)971 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
972 {
973   return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
974 }
975 
976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrhadd_u8(uint8x8_t __a,uint8x8_t __b)977 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
978 {
979   return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
980 						   (int8x8_t) __b);
981 }
982 
983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrhadd_u16(uint16x4_t __a,uint16x4_t __b)984 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
985 {
986   return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
987 						    (int16x4_t) __b);
988 }
989 
990 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrhadd_u32(uint32x2_t __a,uint32x2_t __b)991 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
992 {
993   return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
994 						    (int32x2_t) __b);
995 }
996 
997 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrhaddq_s8(int8x16_t __a,int8x16_t __b)998 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
999 {
1000   return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1001 }
1002 
1003 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrhaddq_s16(int16x8_t __a,int16x8_t __b)1004 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1005 {
1006   return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1007 }
1008 
1009 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrhaddq_s32(int32x4_t __a,int32x4_t __b)1010 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1011 {
1012   return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1013 }
1014 
1015 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrhaddq_u8(uint8x16_t __a,uint8x16_t __b)1016 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1017 {
1018   return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1019 						     (int8x16_t) __b);
1020 }
1021 
1022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrhaddq_u16(uint16x8_t __a,uint16x8_t __b)1023 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1024 {
1025   return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1026 						    (int16x8_t) __b);
1027 }
1028 
1029 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrhaddq_u32(uint32x4_t __a,uint32x4_t __b)1030 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1031 {
1032   return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1033 						    (int32x4_t) __b);
1034 }
1035 
1036 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vaddhn_s16(int16x8_t __a,int16x8_t __b)1037 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1038 {
1039   return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1040 }
1041 
1042 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vaddhn_s32(int32x4_t __a,int32x4_t __b)1043 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1044 {
1045   return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1046 }
1047 
1048 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vaddhn_s64(int64x2_t __a,int64x2_t __b)1049 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1050 {
1051   return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1052 }
1053 
1054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vaddhn_u16(uint16x8_t __a,uint16x8_t __b)1055 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1056 {
1057   return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1058 						  (int16x8_t) __b);
1059 }
1060 
1061 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vaddhn_u32(uint32x4_t __a,uint32x4_t __b)1062 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1063 {
1064   return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1065 						   (int32x4_t) __b);
1066 }
1067 
1068 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vaddhn_u64(uint64x2_t __a,uint64x2_t __b)1069 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1070 {
1071   return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1072 						   (int64x2_t) __b);
1073 }
1074 
1075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vraddhn_s16(int16x8_t __a,int16x8_t __b)1076 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1077 {
1078   return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1079 }
1080 
1081 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vraddhn_s32(int32x4_t __a,int32x4_t __b)1082 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1083 {
1084   return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1085 }
1086 
1087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vraddhn_s64(int64x2_t __a,int64x2_t __b)1088 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1089 {
1090   return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1091 }
1092 
1093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vraddhn_u16(uint16x8_t __a,uint16x8_t __b)1094 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1095 {
1096   return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1097 						   (int16x8_t) __b);
1098 }
1099 
1100 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vraddhn_u32(uint32x4_t __a,uint32x4_t __b)1101 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1102 {
1103   return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1104 						    (int32x4_t) __b);
1105 }
1106 
1107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vraddhn_u64(uint64x2_t __a,uint64x2_t __b)1108 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1109 {
1110   return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1111 						    (int64x2_t) __b);
1112 }
1113 
1114 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vaddhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)1115 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1116 {
1117   return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1118 }
1119 
1120 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)1121 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1122 {
1123   return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1124 }
1125 
1126 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)1127 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1128 {
1129   return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1130 }
1131 
1132 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaddhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)1133 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1134 {
1135   return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1136 						    (int16x8_t) __b,
1137 						    (int16x8_t) __c);
1138 }
1139 
1140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)1141 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1142 {
1143   return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1144 						    (int32x4_t) __b,
1145 						    (int32x4_t) __c);
1146 }
1147 
1148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)1149 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1150 {
1151   return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1152 						    (int64x2_t) __b,
1153 						    (int64x2_t) __c);
1154 }
1155 
1156 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vraddhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)1157 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1158 {
1159   return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1160 }
1161 
1162 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vraddhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)1163 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1164 {
1165   return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1166 }
1167 
1168 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vraddhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)1169 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1170 {
1171   return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1172 }
1173 
1174 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vraddhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)1175 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1176 {
1177   return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1178 						     (int16x8_t) __b,
1179 						     (int16x8_t) __c);
1180 }
1181 
1182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vraddhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)1183 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1184 {
1185   return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1186 						     (int32x4_t) __b,
1187 						     (int32x4_t) __c);
1188 }
1189 
1190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vraddhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)1191 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1192 {
1193   return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1194 						     (int64x2_t) __b,
1195 						     (int64x2_t) __c);
1196 }
1197 
1198 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdiv_f32(float32x2_t __a,float32x2_t __b)1199 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1200 {
1201   return __a / __b;
1202 }
1203 
1204 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vdiv_f64(float64x1_t __a,float64x1_t __b)1205 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1206 {
1207   return __a / __b;
1208 }
1209 
1210 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdivq_f32(float32x4_t __a,float32x4_t __b)1211 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1212 {
1213   return __a / __b;
1214 }
1215 
1216 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdivq_f64(float64x2_t __a,float64x2_t __b)1217 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1218 {
1219   return __a / __b;
1220 }
1221 
1222 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmul_s8(int8x8_t __a,int8x8_t __b)1223 vmul_s8 (int8x8_t __a, int8x8_t __b)
1224 {
1225   return __a * __b;
1226 }
1227 
1228 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_s16(int16x4_t __a,int16x4_t __b)1229 vmul_s16 (int16x4_t __a, int16x4_t __b)
1230 {
1231   return __a * __b;
1232 }
1233 
1234 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_s32(int32x2_t __a,int32x2_t __b)1235 vmul_s32 (int32x2_t __a, int32x2_t __b)
1236 {
1237   return __a * __b;
1238 }
1239 
1240 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_f32(float32x2_t __a,float32x2_t __b)1241 vmul_f32 (float32x2_t __a, float32x2_t __b)
1242 {
1243   return __a * __b;
1244 }
1245 
1246 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_f64(float64x1_t __a,float64x1_t __b)1247 vmul_f64 (float64x1_t __a, float64x1_t __b)
1248 {
1249   return __a * __b;
1250 }
1251 
1252 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmul_u8(uint8x8_t __a,uint8x8_t __b)1253 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1254 {
1255   return __a * __b;
1256 }
1257 
1258 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_u16(uint16x4_t __a,uint16x4_t __b)1259 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1260 {
1261   return __a * __b;
1262 }
1263 
1264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_u32(uint32x2_t __a,uint32x2_t __b)1265 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1266 {
1267   return __a * __b;
1268 }
1269 
1270 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmul_p8(poly8x8_t __a,poly8x8_t __b)1271 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1272 {
1273   return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1274 						 (int8x8_t) __b);
1275 }
1276 
1277 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmulq_s8(int8x16_t __a,int8x16_t __b)1278 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1279 {
1280   return __a * __b;
1281 }
1282 
1283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_s16(int16x8_t __a,int16x8_t __b)1284 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1285 {
1286   return __a * __b;
1287 }
1288 
1289 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_s32(int32x4_t __a,int32x4_t __b)1290 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1291 {
1292   return __a * __b;
1293 }
1294 
1295 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_f32(float32x4_t __a,float32x4_t __b)1296 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1297 {
1298   return __a * __b;
1299 }
1300 
1301 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_f64(float64x2_t __a,float64x2_t __b)1302 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1303 {
1304   return __a * __b;
1305 }
1306 
1307 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmulq_u8(uint8x16_t __a,uint8x16_t __b)1308 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1309 {
1310   return __a * __b;
1311 }
1312 
1313 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_u16(uint16x8_t __a,uint16x8_t __b)1314 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1315 {
1316   return __a * __b;
1317 }
1318 
1319 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_u32(uint32x4_t __a,uint32x4_t __b)1320 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1321 {
1322   return __a * __b;
1323 }
1324 
1325 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmulq_p8(poly8x16_t __a,poly8x16_t __b)1326 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1327 {
1328   return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1329 						   (int8x16_t) __b);
1330 }
1331 
1332 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vand_s8(int8x8_t __a,int8x8_t __b)1333 vand_s8 (int8x8_t __a, int8x8_t __b)
1334 {
1335   return __a & __b;
1336 }
1337 
1338 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vand_s16(int16x4_t __a,int16x4_t __b)1339 vand_s16 (int16x4_t __a, int16x4_t __b)
1340 {
1341   return __a & __b;
1342 }
1343 
1344 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vand_s32(int32x2_t __a,int32x2_t __b)1345 vand_s32 (int32x2_t __a, int32x2_t __b)
1346 {
1347   return __a & __b;
1348 }
1349 
1350 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vand_u8(uint8x8_t __a,uint8x8_t __b)1351 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1352 {
1353   return __a & __b;
1354 }
1355 
1356 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vand_u16(uint16x4_t __a,uint16x4_t __b)1357 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1358 {
1359   return __a & __b;
1360 }
1361 
1362 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vand_u32(uint32x2_t __a,uint32x2_t __b)1363 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1364 {
1365   return __a & __b;
1366 }
1367 
1368 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vand_s64(int64x1_t __a,int64x1_t __b)1369 vand_s64 (int64x1_t __a, int64x1_t __b)
1370 {
1371   return __a & __b;
1372 }
1373 
1374 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vand_u64(uint64x1_t __a,uint64x1_t __b)1375 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1376 {
1377   return __a & __b;
1378 }
1379 
1380 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vandq_s8(int8x16_t __a,int8x16_t __b)1381 vandq_s8 (int8x16_t __a, int8x16_t __b)
1382 {
1383   return __a & __b;
1384 }
1385 
1386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vandq_s16(int16x8_t __a,int16x8_t __b)1387 vandq_s16 (int16x8_t __a, int16x8_t __b)
1388 {
1389   return __a & __b;
1390 }
1391 
1392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vandq_s32(int32x4_t __a,int32x4_t __b)1393 vandq_s32 (int32x4_t __a, int32x4_t __b)
1394 {
1395   return __a & __b;
1396 }
1397 
1398 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vandq_s64(int64x2_t __a,int64x2_t __b)1399 vandq_s64 (int64x2_t __a, int64x2_t __b)
1400 {
1401   return __a & __b;
1402 }
1403 
1404 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vandq_u8(uint8x16_t __a,uint8x16_t __b)1405 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1406 {
1407   return __a & __b;
1408 }
1409 
1410 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vandq_u16(uint16x8_t __a,uint16x8_t __b)1411 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1412 {
1413   return __a & __b;
1414 }
1415 
1416 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vandq_u32(uint32x4_t __a,uint32x4_t __b)1417 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1418 {
1419   return __a & __b;
1420 }
1421 
1422 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vandq_u64(uint64x2_t __a,uint64x2_t __b)1423 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1424 {
1425   return __a & __b;
1426 }
1427 
1428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vorr_s8(int8x8_t __a,int8x8_t __b)1429 vorr_s8 (int8x8_t __a, int8x8_t __b)
1430 {
1431   return __a | __b;
1432 }
1433 
1434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vorr_s16(int16x4_t __a,int16x4_t __b)1435 vorr_s16 (int16x4_t __a, int16x4_t __b)
1436 {
1437   return __a | __b;
1438 }
1439 
1440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vorr_s32(int32x2_t __a,int32x2_t __b)1441 vorr_s32 (int32x2_t __a, int32x2_t __b)
1442 {
1443   return __a | __b;
1444 }
1445 
1446 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vorr_u8(uint8x8_t __a,uint8x8_t __b)1447 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1448 {
1449   return __a | __b;
1450 }
1451 
1452 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vorr_u16(uint16x4_t __a,uint16x4_t __b)1453 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1454 {
1455   return __a | __b;
1456 }
1457 
1458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vorr_u32(uint32x2_t __a,uint32x2_t __b)1459 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1460 {
1461   return __a | __b;
1462 }
1463 
1464 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vorr_s64(int64x1_t __a,int64x1_t __b)1465 vorr_s64 (int64x1_t __a, int64x1_t __b)
1466 {
1467   return __a | __b;
1468 }
1469 
1470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vorr_u64(uint64x1_t __a,uint64x1_t __b)1471 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1472 {
1473   return __a | __b;
1474 }
1475 
1476 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vorrq_s8(int8x16_t __a,int8x16_t __b)1477 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1478 {
1479   return __a | __b;
1480 }
1481 
1482 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vorrq_s16(int16x8_t __a,int16x8_t __b)1483 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1484 {
1485   return __a | __b;
1486 }
1487 
1488 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vorrq_s32(int32x4_t __a,int32x4_t __b)1489 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1490 {
1491   return __a | __b;
1492 }
1493 
1494 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vorrq_s64(int64x2_t __a,int64x2_t __b)1495 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1496 {
1497   return __a | __b;
1498 }
1499 
1500 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vorrq_u8(uint8x16_t __a,uint8x16_t __b)1501 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1502 {
1503   return __a | __b;
1504 }
1505 
1506 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vorrq_u16(uint16x8_t __a,uint16x8_t __b)1507 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1508 {
1509   return __a | __b;
1510 }
1511 
1512 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vorrq_u32(uint32x4_t __a,uint32x4_t __b)1513 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1514 {
1515   return __a | __b;
1516 }
1517 
1518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vorrq_u64(uint64x2_t __a,uint64x2_t __b)1519 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1520 {
1521   return __a | __b;
1522 }
1523 
1524 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
veor_s8(int8x8_t __a,int8x8_t __b)1525 veor_s8 (int8x8_t __a, int8x8_t __b)
1526 {
1527   return __a ^ __b;
1528 }
1529 
1530 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
veor_s16(int16x4_t __a,int16x4_t __b)1531 veor_s16 (int16x4_t __a, int16x4_t __b)
1532 {
1533   return __a ^ __b;
1534 }
1535 
1536 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
veor_s32(int32x2_t __a,int32x2_t __b)1537 veor_s32 (int32x2_t __a, int32x2_t __b)
1538 {
1539   return __a ^ __b;
1540 }
1541 
1542 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
veor_u8(uint8x8_t __a,uint8x8_t __b)1543 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1544 {
1545   return __a ^ __b;
1546 }
1547 
1548 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
veor_u16(uint16x4_t __a,uint16x4_t __b)1549 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1550 {
1551   return __a ^ __b;
1552 }
1553 
1554 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
veor_u32(uint32x2_t __a,uint32x2_t __b)1555 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1556 {
1557   return __a ^ __b;
1558 }
1559 
1560 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
veor_s64(int64x1_t __a,int64x1_t __b)1561 veor_s64 (int64x1_t __a, int64x1_t __b)
1562 {
1563   return __a ^ __b;
1564 }
1565 
1566 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
veor_u64(uint64x1_t __a,uint64x1_t __b)1567 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1568 {
1569   return __a ^ __b;
1570 }
1571 
1572 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
veorq_s8(int8x16_t __a,int8x16_t __b)1573 veorq_s8 (int8x16_t __a, int8x16_t __b)
1574 {
1575   return __a ^ __b;
1576 }
1577 
1578 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
veorq_s16(int16x8_t __a,int16x8_t __b)1579 veorq_s16 (int16x8_t __a, int16x8_t __b)
1580 {
1581   return __a ^ __b;
1582 }
1583 
1584 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
veorq_s32(int32x4_t __a,int32x4_t __b)1585 veorq_s32 (int32x4_t __a, int32x4_t __b)
1586 {
1587   return __a ^ __b;
1588 }
1589 
1590 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
veorq_s64(int64x2_t __a,int64x2_t __b)1591 veorq_s64 (int64x2_t __a, int64x2_t __b)
1592 {
1593   return __a ^ __b;
1594 }
1595 
1596 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
veorq_u8(uint8x16_t __a,uint8x16_t __b)1597 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1598 {
1599   return __a ^ __b;
1600 }
1601 
1602 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
veorq_u16(uint16x8_t __a,uint16x8_t __b)1603 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1604 {
1605   return __a ^ __b;
1606 }
1607 
1608 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
veorq_u32(uint32x4_t __a,uint32x4_t __b)1609 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1610 {
1611   return __a ^ __b;
1612 }
1613 
1614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
veorq_u64(uint64x2_t __a,uint64x2_t __b)1615 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1616 {
1617   return __a ^ __b;
1618 }
1619 
1620 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vbic_s8(int8x8_t __a,int8x8_t __b)1621 vbic_s8 (int8x8_t __a, int8x8_t __b)
1622 {
1623   return __a & ~__b;
1624 }
1625 
1626 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vbic_s16(int16x4_t __a,int16x4_t __b)1627 vbic_s16 (int16x4_t __a, int16x4_t __b)
1628 {
1629   return __a & ~__b;
1630 }
1631 
1632 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vbic_s32(int32x2_t __a,int32x2_t __b)1633 vbic_s32 (int32x2_t __a, int32x2_t __b)
1634 {
1635   return __a & ~__b;
1636 }
1637 
1638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vbic_u8(uint8x8_t __a,uint8x8_t __b)1639 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1640 {
1641   return __a & ~__b;
1642 }
1643 
1644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vbic_u16(uint16x4_t __a,uint16x4_t __b)1645 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1646 {
1647   return __a & ~__b;
1648 }
1649 
1650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vbic_u32(uint32x2_t __a,uint32x2_t __b)1651 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1652 {
1653   return __a & ~__b;
1654 }
1655 
1656 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vbic_s64(int64x1_t __a,int64x1_t __b)1657 vbic_s64 (int64x1_t __a, int64x1_t __b)
1658 {
1659   return __a & ~__b;
1660 }
1661 
1662 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vbic_u64(uint64x1_t __a,uint64x1_t __b)1663 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1664 {
1665   return __a & ~__b;
1666 }
1667 
1668 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vbicq_s8(int8x16_t __a,int8x16_t __b)1669 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1670 {
1671   return __a & ~__b;
1672 }
1673 
1674 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vbicq_s16(int16x8_t __a,int16x8_t __b)1675 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1676 {
1677   return __a & ~__b;
1678 }
1679 
1680 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vbicq_s32(int32x4_t __a,int32x4_t __b)1681 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1682 {
1683   return __a & ~__b;
1684 }
1685 
1686 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vbicq_s64(int64x2_t __a,int64x2_t __b)1687 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1688 {
1689   return __a & ~__b;
1690 }
1691 
1692 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vbicq_u8(uint8x16_t __a,uint8x16_t __b)1693 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1694 {
1695   return __a & ~__b;
1696 }
1697 
1698 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vbicq_u16(uint16x8_t __a,uint16x8_t __b)1699 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1700 {
1701   return __a & ~__b;
1702 }
1703 
1704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vbicq_u32(uint32x4_t __a,uint32x4_t __b)1705 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1706 {
1707   return __a & ~__b;
1708 }
1709 
1710 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vbicq_u64(uint64x2_t __a,uint64x2_t __b)1711 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1712 {
1713   return __a & ~__b;
1714 }
1715 
1716 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vorn_s8(int8x8_t __a,int8x8_t __b)1717 vorn_s8 (int8x8_t __a, int8x8_t __b)
1718 {
1719   return __a | ~__b;
1720 }
1721 
1722 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vorn_s16(int16x4_t __a,int16x4_t __b)1723 vorn_s16 (int16x4_t __a, int16x4_t __b)
1724 {
1725   return __a | ~__b;
1726 }
1727 
1728 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vorn_s32(int32x2_t __a,int32x2_t __b)1729 vorn_s32 (int32x2_t __a, int32x2_t __b)
1730 {
1731   return __a | ~__b;
1732 }
1733 
1734 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vorn_u8(uint8x8_t __a,uint8x8_t __b)1735 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1736 {
1737   return __a | ~__b;
1738 }
1739 
1740 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vorn_u16(uint16x4_t __a,uint16x4_t __b)1741 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1742 {
1743   return __a | ~__b;
1744 }
1745 
1746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vorn_u32(uint32x2_t __a,uint32x2_t __b)1747 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1748 {
1749   return __a | ~__b;
1750 }
1751 
1752 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vorn_s64(int64x1_t __a,int64x1_t __b)1753 vorn_s64 (int64x1_t __a, int64x1_t __b)
1754 {
1755   return __a | ~__b;
1756 }
1757 
1758 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vorn_u64(uint64x1_t __a,uint64x1_t __b)1759 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1760 {
1761   return __a | ~__b;
1762 }
1763 
1764 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vornq_s8(int8x16_t __a,int8x16_t __b)1765 vornq_s8 (int8x16_t __a, int8x16_t __b)
1766 {
1767   return __a | ~__b;
1768 }
1769 
1770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vornq_s16(int16x8_t __a,int16x8_t __b)1771 vornq_s16 (int16x8_t __a, int16x8_t __b)
1772 {
1773   return __a | ~__b;
1774 }
1775 
1776 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vornq_s32(int32x4_t __a,int32x4_t __b)1777 vornq_s32 (int32x4_t __a, int32x4_t __b)
1778 {
1779   return __a | ~__b;
1780 }
1781 
1782 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vornq_s64(int64x2_t __a,int64x2_t __b)1783 vornq_s64 (int64x2_t __a, int64x2_t __b)
1784 {
1785   return __a | ~__b;
1786 }
1787 
1788 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vornq_u8(uint8x16_t __a,uint8x16_t __b)1789 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1790 {
1791   return __a | ~__b;
1792 }
1793 
1794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vornq_u16(uint16x8_t __a,uint16x8_t __b)1795 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1796 {
1797   return __a | ~__b;
1798 }
1799 
1800 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vornq_u32(uint32x4_t __a,uint32x4_t __b)1801 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1802 {
1803   return __a | ~__b;
1804 }
1805 
1806 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vornq_u64(uint64x2_t __a,uint64x2_t __b)1807 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1808 {
1809   return __a | ~__b;
1810 }
1811 
1812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsub_s8(int8x8_t __a,int8x8_t __b)1813 vsub_s8 (int8x8_t __a, int8x8_t __b)
1814 {
1815   return __a - __b;
1816 }
1817 
1818 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsub_s16(int16x4_t __a,int16x4_t __b)1819 vsub_s16 (int16x4_t __a, int16x4_t __b)
1820 {
1821   return __a - __b;
1822 }
1823 
1824 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsub_s32(int32x2_t __a,int32x2_t __b)1825 vsub_s32 (int32x2_t __a, int32x2_t __b)
1826 {
1827   return __a - __b;
1828 }
1829 
1830 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vsub_f32(float32x2_t __a,float32x2_t __b)1831 vsub_f32 (float32x2_t __a, float32x2_t __b)
1832 {
1833   return __a - __b;
1834 }
1835 
1836 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vsub_f64(float64x1_t __a,float64x1_t __b)1837 vsub_f64 (float64x1_t __a, float64x1_t __b)
1838 {
1839   return __a - __b;
1840 }
1841 
1842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsub_u8(uint8x8_t __a,uint8x8_t __b)1843 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1844 {
1845   return __a - __b;
1846 }
1847 
1848 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsub_u16(uint16x4_t __a,uint16x4_t __b)1849 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1850 {
1851   return __a - __b;
1852 }
1853 
1854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsub_u32(uint32x2_t __a,uint32x2_t __b)1855 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1856 {
1857   return __a - __b;
1858 }
1859 
1860 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsub_s64(int64x1_t __a,int64x1_t __b)1861 vsub_s64 (int64x1_t __a, int64x1_t __b)
1862 {
1863   return __a - __b;
1864 }
1865 
1866 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsub_u64(uint64x1_t __a,uint64x1_t __b)1867 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1868 {
1869   return __a - __b;
1870 }
1871 
1872 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubq_s8(int8x16_t __a,int8x16_t __b)1873 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1874 {
1875   return __a - __b;
1876 }
1877 
1878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubq_s16(int16x8_t __a,int16x8_t __b)1879 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1880 {
1881   return __a - __b;
1882 }
1883 
1884 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubq_s32(int32x4_t __a,int32x4_t __b)1885 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1886 {
1887   return __a - __b;
1888 }
1889 
1890 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubq_s64(int64x2_t __a,int64x2_t __b)1891 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1892 {
1893   return __a - __b;
1894 }
1895 
1896 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsubq_f32(float32x4_t __a,float32x4_t __b)1897 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1898 {
1899   return __a - __b;
1900 }
1901 
1902 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vsubq_f64(float64x2_t __a,float64x2_t __b)1903 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1904 {
1905   return __a - __b;
1906 }
1907 
1908 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubq_u8(uint8x16_t __a,uint8x16_t __b)1909 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1910 {
1911   return __a - __b;
1912 }
1913 
1914 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubq_u16(uint16x8_t __a,uint16x8_t __b)1915 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1916 {
1917   return __a - __b;
1918 }
1919 
1920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubq_u32(uint32x4_t __a,uint32x4_t __b)1921 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1922 {
1923   return __a - __b;
1924 }
1925 
1926 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubq_u64(uint64x2_t __a,uint64x2_t __b)1927 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1928 {
1929   return __a - __b;
1930 }
1931 
1932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubl_s8(int8x8_t __a,int8x8_t __b)1933 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1934 {
1935   return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1936 }
1937 
1938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubl_s16(int16x4_t __a,int16x4_t __b)1939 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1940 {
1941   return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1942 }
1943 
1944 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubl_s32(int32x2_t __a,int32x2_t __b)1945 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1946 {
1947   return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1948 }
1949 
1950 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubl_u8(uint8x8_t __a,uint8x8_t __b)1951 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1952 {
1953   return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1954 						   (int8x8_t) __b);
1955 }
1956 
1957 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubl_u16(uint16x4_t __a,uint16x4_t __b)1958 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1959 {
1960   return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1961 						   (int16x4_t) __b);
1962 }
1963 
1964 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubl_u32(uint32x2_t __a,uint32x2_t __b)1965 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1966 {
1967   return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1968 						   (int32x2_t) __b);
1969 }
1970 
1971 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubl_high_s8(int8x16_t __a,int8x16_t __b)1972 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1973 {
1974   return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1975 }
1976 
1977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubl_high_s16(int16x8_t __a,int16x8_t __b)1978 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1979 {
1980   return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1981 }
1982 
1983 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubl_high_s32(int32x4_t __a,int32x4_t __b)1984 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1985 {
1986   return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1987 }
1988 
1989 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubl_high_u8(uint8x16_t __a,uint8x16_t __b)1990 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1991 {
1992   return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1993 						     (int8x16_t) __b);
1994 }
1995 
1996 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubl_high_u16(uint16x8_t __a,uint16x8_t __b)1997 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1998 {
1999   return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2000 						    (int16x8_t) __b);
2001 }
2002 
2003 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubl_high_u32(uint32x4_t __a,uint32x4_t __b)2004 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2005 {
2006   return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2007 						    (int32x4_t) __b);
2008 }
2009 
2010 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubw_s8(int16x8_t __a,int8x8_t __b)2011 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2012 {
2013   return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2014 }
2015 
2016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubw_s16(int32x4_t __a,int16x4_t __b)2017 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2018 {
2019   return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2020 }
2021 
2022 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubw_s32(int64x2_t __a,int32x2_t __b)2023 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2024 {
2025   return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2026 }
2027 
2028 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubw_u8(uint16x8_t __a,uint8x8_t __b)2029 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2030 {
2031   return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2032 						   (int8x8_t) __b);
2033 }
2034 
2035 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubw_u16(uint32x4_t __a,uint16x4_t __b)2036 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2037 {
2038   return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2039 						   (int16x4_t) __b);
2040 }
2041 
2042 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubw_u32(uint64x2_t __a,uint32x2_t __b)2043 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2044 {
2045   return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2046 						   (int32x2_t) __b);
2047 }
2048 
2049 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubw_high_s8(int16x8_t __a,int8x16_t __b)2050 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2051 {
2052   return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2053 }
2054 
2055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubw_high_s16(int32x4_t __a,int16x8_t __b)2056 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2057 {
2058   return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2059 }
2060 
2061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubw_high_s32(int64x2_t __a,int32x4_t __b)2062 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2063 {
2064   return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2065 }
2066 
2067 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubw_high_u8(uint16x8_t __a,uint8x16_t __b)2068 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2069 {
2070   return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2071 						     (int8x16_t) __b);
2072 }
2073 
2074 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubw_high_u16(uint32x4_t __a,uint16x8_t __b)2075 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2076 {
2077   return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2078 						    (int16x8_t) __b);
2079 }
2080 
2081 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubw_high_u32(uint64x2_t __a,uint32x4_t __b)2082 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2083 {
2084   return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2085 						    (int32x4_t) __b);
2086 }
2087 
2088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqadd_s8(int8x8_t __a,int8x8_t __b)2089 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2090 {
2091   return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2092 }
2093 
2094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqadd_s16(int16x4_t __a,int16x4_t __b)2095 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2096 {
2097   return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2098 }
2099 
2100 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqadd_s32(int32x2_t __a,int32x2_t __b)2101 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2102 {
2103   return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2104 }
2105 
2106 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqadd_s64(int64x1_t __a,int64x1_t __b)2107 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2108 {
2109   return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2110 }
2111 
2112 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqadd_u8(uint8x8_t __a,uint8x8_t __b)2113 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2114 {
2115   return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2116 }
2117 
2118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vhsub_s8(int8x8_t __a,int8x8_t __b)2119 vhsub_s8 (int8x8_t __a, int8x8_t __b)
2120 {
2121   return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
2122 }
2123 
2124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vhsub_s16(int16x4_t __a,int16x4_t __b)2125 vhsub_s16 (int16x4_t __a, int16x4_t __b)
2126 {
2127   return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
2128 }
2129 
2130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vhsub_s32(int32x2_t __a,int32x2_t __b)2131 vhsub_s32 (int32x2_t __a, int32x2_t __b)
2132 {
2133   return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
2134 }
2135 
2136 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vhsub_u8(uint8x8_t __a,uint8x8_t __b)2137 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
2138 {
2139   return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
2140 						  (int8x8_t) __b);
2141 }
2142 
2143 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vhsub_u16(uint16x4_t __a,uint16x4_t __b)2144 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
2145 {
2146   return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
2147 						   (int16x4_t) __b);
2148 }
2149 
2150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vhsub_u32(uint32x2_t __a,uint32x2_t __b)2151 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
2152 {
2153   return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
2154 						   (int32x2_t) __b);
2155 }
2156 
2157 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vhsubq_s8(int8x16_t __a,int8x16_t __b)2158 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
2159 {
2160   return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
2161 }
2162 
2163 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vhsubq_s16(int16x8_t __a,int16x8_t __b)2164 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
2165 {
2166   return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
2167 }
2168 
2169 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vhsubq_s32(int32x4_t __a,int32x4_t __b)2170 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
2171 {
2172   return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
2173 }
2174 
2175 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vhsubq_u8(uint8x16_t __a,uint8x16_t __b)2176 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2177 {
2178   return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
2179 						    (int8x16_t) __b);
2180 }
2181 
2182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vhsubq_u16(uint16x8_t __a,uint16x8_t __b)2183 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2184 {
2185   return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
2186 						   (int16x8_t) __b);
2187 }
2188 
2189 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vhsubq_u32(uint32x4_t __a,uint32x4_t __b)2190 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2191 {
2192   return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
2193 						   (int32x4_t) __b);
2194 }
2195 
2196 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsubhn_s16(int16x8_t __a,int16x8_t __b)2197 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2198 {
2199   return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
2200 }
2201 
2202 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsubhn_s32(int32x4_t __a,int32x4_t __b)2203 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2204 {
2205   return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
2206 }
2207 
2208 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsubhn_s64(int64x2_t __a,int64x2_t __b)2209 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2210 {
2211   return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
2212 }
2213 
2214 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsubhn_u16(uint16x8_t __a,uint16x8_t __b)2215 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2216 {
2217   return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
2218 						  (int16x8_t) __b);
2219 }
2220 
2221 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsubhn_u32(uint32x4_t __a,uint32x4_t __b)2222 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2223 {
2224   return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
2225 						   (int32x4_t) __b);
2226 }
2227 
2228 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsubhn_u64(uint64x2_t __a,uint64x2_t __b)2229 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2230 {
2231   return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
2232 						   (int64x2_t) __b);
2233 }
2234 
2235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrsubhn_s16(int16x8_t __a,int16x8_t __b)2236 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2237 {
2238   return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
2239 }
2240 
2241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrsubhn_s32(int32x4_t __a,int32x4_t __b)2242 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2243 {
2244   return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
2245 }
2246 
2247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrsubhn_s64(int64x2_t __a,int64x2_t __b)2248 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2249 {
2250   return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
2251 }
2252 
2253 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrsubhn_u16(uint16x8_t __a,uint16x8_t __b)2254 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2255 {
2256   return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
2257 						   (int16x8_t) __b);
2258 }
2259 
2260 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrsubhn_u32(uint32x4_t __a,uint32x4_t __b)2261 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2262 {
2263   return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
2264 						    (int32x4_t) __b);
2265 }
2266 
2267 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsubhn_u64(uint64x2_t __a,uint64x2_t __b)2268 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2269 {
2270   return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
2271 						    (int64x2_t) __b);
2272 }
2273 
2274 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)2275 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2276 {
2277   return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
2278 }
2279 
2280 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)2281 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2282 {
2283   return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
2284 }
2285 
2286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)2287 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2288 {
2289   return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
2290 }
2291 
2292 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)2293 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2294 {
2295   return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
2296 						     (int16x8_t) __b,
2297 						     (int16x8_t) __c);
2298 }
2299 
2300 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)2301 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2302 {
2303   return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
2304 						     (int32x4_t) __b,
2305 						     (int32x4_t) __c);
2306 }
2307 
2308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)2309 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2310 {
2311   return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
2312 						     (int64x2_t) __b,
2313 						     (int64x2_t) __c);
2314 }
2315 
2316 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)2317 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2318 {
2319   return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
2320 }
2321 
2322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)2323 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2324 {
2325   return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
2326 }
2327 
2328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)2329 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2330 {
2331   return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
2332 }
2333 
2334 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)2335 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2336 {
2337   return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
2338 						    (int16x8_t) __b,
2339 						    (int16x8_t) __c);
2340 }
2341 
2342 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)2343 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2344 {
2345   return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
2346 						    (int32x4_t) __b,
2347 						    (int32x4_t) __c);
2348 }
2349 
2350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)2351 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2352 {
2353   return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
2354 						    (int64x2_t) __b,
2355 						    (int64x2_t) __c);
2356 }
2357 
2358 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqadd_u16(uint16x4_t __a,uint16x4_t __b)2359 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2360 {
2361   return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2362 }
2363 
2364 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqadd_u32(uint32x2_t __a,uint32x2_t __b)2365 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2366 {
2367   return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2368 }
2369 
2370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqadd_u64(uint64x1_t __a,uint64x1_t __b)2371 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2372 {
2373   return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2374 }
2375 
2376 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqaddq_s8(int8x16_t __a,int8x16_t __b)2377 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2378 {
2379   return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2380 }
2381 
2382 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqaddq_s16(int16x8_t __a,int16x8_t __b)2383 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2384 {
2385   return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2386 }
2387 
2388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqaddq_s32(int32x4_t __a,int32x4_t __b)2389 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2390 {
2391   return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2392 }
2393 
2394 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqaddq_s64(int64x2_t __a,int64x2_t __b)2395 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2396 {
2397   return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2398 }
2399 
2400 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqaddq_u8(uint8x16_t __a,uint8x16_t __b)2401 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2402 {
2403   return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2404 }
2405 
2406 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqaddq_u16(uint16x8_t __a,uint16x8_t __b)2407 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2408 {
2409   return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2410 }
2411 
2412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqaddq_u32(uint32x4_t __a,uint32x4_t __b)2413 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2414 {
2415   return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2416 }
2417 
2418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqaddq_u64(uint64x2_t __a,uint64x2_t __b)2419 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2420 {
2421   return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2422 }
2423 
2424 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqsub_s8(int8x8_t __a,int8x8_t __b)2425 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2426 {
2427   return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2428 }
2429 
2430 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqsub_s16(int16x4_t __a,int16x4_t __b)2431 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2432 {
2433   return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2434 }
2435 
2436 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqsub_s32(int32x2_t __a,int32x2_t __b)2437 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2438 {
2439   return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2440 }
2441 
2442 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqsub_s64(int64x1_t __a,int64x1_t __b)2443 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2444 {
2445   return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2446 }
2447 
2448 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqsub_u8(uint8x8_t __a,uint8x8_t __b)2449 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2450 {
2451   return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2452 }
2453 
2454 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqsub_u16(uint16x4_t __a,uint16x4_t __b)2455 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2456 {
2457   return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2458 }
2459 
2460 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqsub_u32(uint32x2_t __a,uint32x2_t __b)2461 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2462 {
2463   return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2464 }
2465 
2466 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqsub_u64(uint64x1_t __a,uint64x1_t __b)2467 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2468 {
2469   return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2470 }
2471 
2472 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqsubq_s8(int8x16_t __a,int8x16_t __b)2473 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2474 {
2475   return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2476 }
2477 
2478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqsubq_s16(int16x8_t __a,int16x8_t __b)2479 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2480 {
2481   return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2482 }
2483 
2484 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqsubq_s32(int32x4_t __a,int32x4_t __b)2485 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2486 {
2487   return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2488 }
2489 
2490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqsubq_s64(int64x2_t __a,int64x2_t __b)2491 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2492 {
2493   return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2494 }
2495 
2496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqsubq_u8(uint8x16_t __a,uint8x16_t __b)2497 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2498 {
2499   return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2500 }
2501 
2502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqsubq_u16(uint16x8_t __a,uint16x8_t __b)2503 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2504 {
2505   return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2506 }
2507 
2508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqsubq_u32(uint32x4_t __a,uint32x4_t __b)2509 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2510 {
2511   return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2512 }
2513 
2514 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqsubq_u64(uint64x2_t __a,uint64x2_t __b)2515 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2516 {
2517   return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2518 }
2519 
2520 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqneg_s8(int8x8_t __a)2521 vqneg_s8 (int8x8_t __a)
2522 {
2523   return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2524 }
2525 
2526 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqneg_s16(int16x4_t __a)2527 vqneg_s16 (int16x4_t __a)
2528 {
2529   return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2530 }
2531 
2532 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqneg_s32(int32x2_t __a)2533 vqneg_s32 (int32x2_t __a)
2534 {
2535   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2536 }
2537 
2538 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqneg_s64(int64x1_t __a)2539 vqneg_s64 (int64x1_t __a)
2540 {
2541   return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2542 }
2543 
2544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqnegq_s8(int8x16_t __a)2545 vqnegq_s8 (int8x16_t __a)
2546 {
2547   return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2548 }
2549 
2550 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqnegq_s16(int16x8_t __a)2551 vqnegq_s16 (int16x8_t __a)
2552 {
2553   return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2554 }
2555 
2556 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqnegq_s32(int32x4_t __a)2557 vqnegq_s32 (int32x4_t __a)
2558 {
2559   return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2560 }
2561 
2562 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqabs_s8(int8x8_t __a)2563 vqabs_s8 (int8x8_t __a)
2564 {
2565   return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2566 }
2567 
2568 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqabs_s16(int16x4_t __a)2569 vqabs_s16 (int16x4_t __a)
2570 {
2571   return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2572 }
2573 
2574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqabs_s32(int32x2_t __a)2575 vqabs_s32 (int32x2_t __a)
2576 {
2577   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2578 }
2579 
2580 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqabs_s64(int64x1_t __a)2581 vqabs_s64 (int64x1_t __a)
2582 {
2583   return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2584 }
2585 
2586 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqabsq_s8(int8x16_t __a)2587 vqabsq_s8 (int8x16_t __a)
2588 {
2589   return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2590 }
2591 
2592 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqabsq_s16(int16x8_t __a)2593 vqabsq_s16 (int16x8_t __a)
2594 {
2595   return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2596 }
2597 
2598 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqabsq_s32(int32x4_t __a)2599 vqabsq_s32 (int32x4_t __a)
2600 {
2601   return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2602 }
2603 
2604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_s16(int16x4_t __a,int16x4_t __b)2605 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2606 {
2607   return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2608 }
2609 
2610 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_s32(int32x2_t __a,int32x2_t __b)2611 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2612 {
2613   return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2614 }
2615 
2616 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_s16(int16x8_t __a,int16x8_t __b)2617 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2618 {
2619   return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2620 }
2621 
2622 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_s32(int32x4_t __a,int32x4_t __b)2623 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2624 {
2625   return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2626 }
2627 
2628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_s16(int16x4_t __a,int16x4_t __b)2629 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2630 {
2631   return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2632 }
2633 
2634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_s32(int32x2_t __a,int32x2_t __b)2635 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2636 {
2637   return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2638 }
2639 
2640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_s16(int16x8_t __a,int16x8_t __b)2641 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2642 {
2643   return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2644 }
2645 
2646 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_s32(int32x4_t __a,int32x4_t __b)2647 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2648 {
2649   return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2650 }
2651 
2652 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcreate_s8(uint64_t __a)2653 vcreate_s8 (uint64_t __a)
2654 {
2655   return (int8x8_t) __a;
2656 }
2657 
2658 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcreate_s16(uint64_t __a)2659 vcreate_s16 (uint64_t __a)
2660 {
2661   return (int16x4_t) __a;
2662 }
2663 
2664 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcreate_s32(uint64_t __a)2665 vcreate_s32 (uint64_t __a)
2666 {
2667   return (int32x2_t) __a;
2668 }
2669 
2670 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcreate_s64(uint64_t __a)2671 vcreate_s64 (uint64_t __a)
2672 {
2673   return (int64x1_t) {__a};
2674 }
2675 
2676 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcreate_f16(uint64_t __a)2677 vcreate_f16 (uint64_t __a)
2678 {
2679   return (float16x4_t) __a;
2680 }
2681 
2682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32(uint64_t __a)2683 vcreate_f32 (uint64_t __a)
2684 {
2685   return (float32x2_t) __a;
2686 }
2687 
2688 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcreate_u8(uint64_t __a)2689 vcreate_u8 (uint64_t __a)
2690 {
2691   return (uint8x8_t) __a;
2692 }
2693 
2694 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcreate_u16(uint64_t __a)2695 vcreate_u16 (uint64_t __a)
2696 {
2697   return (uint16x4_t) __a;
2698 }
2699 
2700 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcreate_u32(uint64_t __a)2701 vcreate_u32 (uint64_t __a)
2702 {
2703   return (uint32x2_t) __a;
2704 }
2705 
2706 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcreate_u64(uint64_t __a)2707 vcreate_u64 (uint64_t __a)
2708 {
2709   return (uint64x1_t) {__a};
2710 }
2711 
2712 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcreate_f64(uint64_t __a)2713 vcreate_f64 (uint64_t __a)
2714 {
2715   return (float64x1_t) __a;
2716 }
2717 
2718 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcreate_p8(uint64_t __a)2719 vcreate_p8 (uint64_t __a)
2720 {
2721   return (poly8x8_t) __a;
2722 }
2723 
2724 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vcreate_p16(uint64_t __a)2725 vcreate_p16 (uint64_t __a)
2726 {
2727   return (poly16x4_t) __a;
2728 }
2729 
2730 /* vget_lane  */
2731 
2732 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
vget_lane_f16(float16x4_t __a,const int __b)2733 vget_lane_f16 (float16x4_t __a, const int __b)
2734 {
2735   return __aarch64_vget_lane_any (__a, __b);
2736 }
2737 
2738 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vget_lane_f32(float32x2_t __a,const int __b)2739 vget_lane_f32 (float32x2_t __a, const int __b)
2740 {
2741   return __aarch64_vget_lane_any (__a, __b);
2742 }
2743 
2744 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vget_lane_f64(float64x1_t __a,const int __b)2745 vget_lane_f64 (float64x1_t __a, const int __b)
2746 {
2747   return __aarch64_vget_lane_any (__a, __b);
2748 }
2749 
2750 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vget_lane_p8(poly8x8_t __a,const int __b)2751 vget_lane_p8 (poly8x8_t __a, const int __b)
2752 {
2753   return __aarch64_vget_lane_any (__a, __b);
2754 }
2755 
2756 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vget_lane_p16(poly16x4_t __a,const int __b)2757 vget_lane_p16 (poly16x4_t __a, const int __b)
2758 {
2759   return __aarch64_vget_lane_any (__a, __b);
2760 }
2761 
2762 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vget_lane_s8(int8x8_t __a,const int __b)2763 vget_lane_s8 (int8x8_t __a, const int __b)
2764 {
2765   return __aarch64_vget_lane_any (__a, __b);
2766 }
2767 
2768 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vget_lane_s16(int16x4_t __a,const int __b)2769 vget_lane_s16 (int16x4_t __a, const int __b)
2770 {
2771   return __aarch64_vget_lane_any (__a, __b);
2772 }
2773 
2774 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vget_lane_s32(int32x2_t __a,const int __b)2775 vget_lane_s32 (int32x2_t __a, const int __b)
2776 {
2777   return __aarch64_vget_lane_any (__a, __b);
2778 }
2779 
2780 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vget_lane_s64(int64x1_t __a,const int __b)2781 vget_lane_s64 (int64x1_t __a, const int __b)
2782 {
2783   return __aarch64_vget_lane_any (__a, __b);
2784 }
2785 
2786 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vget_lane_u8(uint8x8_t __a,const int __b)2787 vget_lane_u8 (uint8x8_t __a, const int __b)
2788 {
2789   return __aarch64_vget_lane_any (__a, __b);
2790 }
2791 
2792 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vget_lane_u16(uint16x4_t __a,const int __b)2793 vget_lane_u16 (uint16x4_t __a, const int __b)
2794 {
2795   return __aarch64_vget_lane_any (__a, __b);
2796 }
2797 
2798 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vget_lane_u32(uint32x2_t __a,const int __b)2799 vget_lane_u32 (uint32x2_t __a, const int __b)
2800 {
2801   return __aarch64_vget_lane_any (__a, __b);
2802 }
2803 
2804 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vget_lane_u64(uint64x1_t __a,const int __b)2805 vget_lane_u64 (uint64x1_t __a, const int __b)
2806 {
2807   return __aarch64_vget_lane_any (__a, __b);
2808 }
2809 
2810 /* vgetq_lane  */
2811 
2812 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
vgetq_lane_f16(float16x8_t __a,const int __b)2813 vgetq_lane_f16 (float16x8_t __a, const int __b)
2814 {
2815   return __aarch64_vget_lane_any (__a, __b);
2816 }
2817 
2818 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vgetq_lane_f32(float32x4_t __a,const int __b)2819 vgetq_lane_f32 (float32x4_t __a, const int __b)
2820 {
2821   return __aarch64_vget_lane_any (__a, __b);
2822 }
2823 
2824 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vgetq_lane_f64(float64x2_t __a,const int __b)2825 vgetq_lane_f64 (float64x2_t __a, const int __b)
2826 {
2827   return __aarch64_vget_lane_any (__a, __b);
2828 }
2829 
2830 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vgetq_lane_p8(poly8x16_t __a,const int __b)2831 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2832 {
2833   return __aarch64_vget_lane_any (__a, __b);
2834 }
2835 
2836 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vgetq_lane_p16(poly16x8_t __a,const int __b)2837 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2838 {
2839   return __aarch64_vget_lane_any (__a, __b);
2840 }
2841 
2842 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vgetq_lane_s8(int8x16_t __a,const int __b)2843 vgetq_lane_s8 (int8x16_t __a, const int __b)
2844 {
2845   return __aarch64_vget_lane_any (__a, __b);
2846 }
2847 
2848 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vgetq_lane_s16(int16x8_t __a,const int __b)2849 vgetq_lane_s16 (int16x8_t __a, const int __b)
2850 {
2851   return __aarch64_vget_lane_any (__a, __b);
2852 }
2853 
2854 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vgetq_lane_s32(int32x4_t __a,const int __b)2855 vgetq_lane_s32 (int32x4_t __a, const int __b)
2856 {
2857   return __aarch64_vget_lane_any (__a, __b);
2858 }
2859 
2860 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vgetq_lane_s64(int64x2_t __a,const int __b)2861 vgetq_lane_s64 (int64x2_t __a, const int __b)
2862 {
2863   return __aarch64_vget_lane_any (__a, __b);
2864 }
2865 
2866 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vgetq_lane_u8(uint8x16_t __a,const int __b)2867 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2868 {
2869   return __aarch64_vget_lane_any (__a, __b);
2870 }
2871 
2872 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vgetq_lane_u16(uint16x8_t __a,const int __b)2873 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2874 {
2875   return __aarch64_vget_lane_any (__a, __b);
2876 }
2877 
2878 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vgetq_lane_u32(uint32x4_t __a,const int __b)2879 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2880 {
2881   return __aarch64_vget_lane_any (__a, __b);
2882 }
2883 
2884 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vgetq_lane_u64(uint64x2_t __a,const int __b)2885 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2886 {
2887   return __aarch64_vget_lane_any (__a, __b);
2888 }
2889 
2890 /* vreinterpret  */
2891 
2892 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f16(float16x4_t __a)2893 vreinterpret_p8_f16 (float16x4_t __a)
2894 {
2895   return (poly8x8_t) __a;
2896 }
2897 
2898 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f64(float64x1_t __a)2899 vreinterpret_p8_f64 (float64x1_t __a)
2900 {
2901   return (poly8x8_t) __a;
2902 }
2903 
2904 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s8(int8x8_t __a)2905 vreinterpret_p8_s8 (int8x8_t __a)
2906 {
2907   return (poly8x8_t) __a;
2908 }
2909 
2910 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s16(int16x4_t __a)2911 vreinterpret_p8_s16 (int16x4_t __a)
2912 {
2913   return (poly8x8_t) __a;
2914 }
2915 
2916 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s32(int32x2_t __a)2917 vreinterpret_p8_s32 (int32x2_t __a)
2918 {
2919   return (poly8x8_t) __a;
2920 }
2921 
2922 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s64(int64x1_t __a)2923 vreinterpret_p8_s64 (int64x1_t __a)
2924 {
2925   return (poly8x8_t) __a;
2926 }
2927 
2928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f32(float32x2_t __a)2929 vreinterpret_p8_f32 (float32x2_t __a)
2930 {
2931   return (poly8x8_t) __a;
2932 }
2933 
2934 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u8(uint8x8_t __a)2935 vreinterpret_p8_u8 (uint8x8_t __a)
2936 {
2937   return (poly8x8_t) __a;
2938 }
2939 
2940 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u16(uint16x4_t __a)2941 vreinterpret_p8_u16 (uint16x4_t __a)
2942 {
2943   return (poly8x8_t) __a;
2944 }
2945 
2946 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u32(uint32x2_t __a)2947 vreinterpret_p8_u32 (uint32x2_t __a)
2948 {
2949   return (poly8x8_t) __a;
2950 }
2951 
2952 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u64(uint64x1_t __a)2953 vreinterpret_p8_u64 (uint64x1_t __a)
2954 {
2955   return (poly8x8_t) __a;
2956 }
2957 
2958 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_p16(poly16x4_t __a)2959 vreinterpret_p8_p16 (poly16x4_t __a)
2960 {
2961   return (poly8x8_t) __a;
2962 }
2963 
2964 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f64(float64x2_t __a)2965 vreinterpretq_p8_f64 (float64x2_t __a)
2966 {
2967   return (poly8x16_t) __a;
2968 }
2969 
2970 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s8(int8x16_t __a)2971 vreinterpretq_p8_s8 (int8x16_t __a)
2972 {
2973   return (poly8x16_t) __a;
2974 }
2975 
2976 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s16(int16x8_t __a)2977 vreinterpretq_p8_s16 (int16x8_t __a)
2978 {
2979   return (poly8x16_t) __a;
2980 }
2981 
2982 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s32(int32x4_t __a)2983 vreinterpretq_p8_s32 (int32x4_t __a)
2984 {
2985   return (poly8x16_t) __a;
2986 }
2987 
2988 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s64(int64x2_t __a)2989 vreinterpretq_p8_s64 (int64x2_t __a)
2990 {
2991   return (poly8x16_t) __a;
2992 }
2993 
2994 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f16(float16x8_t __a)2995 vreinterpretq_p8_f16 (float16x8_t __a)
2996 {
2997   return (poly8x16_t) __a;
2998 }
2999 
3000 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f32(float32x4_t __a)3001 vreinterpretq_p8_f32 (float32x4_t __a)
3002 {
3003   return (poly8x16_t) __a;
3004 }
3005 
3006 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u8(uint8x16_t __a)3007 vreinterpretq_p8_u8 (uint8x16_t __a)
3008 {
3009   return (poly8x16_t) __a;
3010 }
3011 
3012 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u16(uint16x8_t __a)3013 vreinterpretq_p8_u16 (uint16x8_t __a)
3014 {
3015   return (poly8x16_t) __a;
3016 }
3017 
3018 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u32(uint32x4_t __a)3019 vreinterpretq_p8_u32 (uint32x4_t __a)
3020 {
3021   return (poly8x16_t) __a;
3022 }
3023 
3024 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u64(uint64x2_t __a)3025 vreinterpretq_p8_u64 (uint64x2_t __a)
3026 {
3027   return (poly8x16_t) __a;
3028 }
3029 
3030 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_p16(poly16x8_t __a)3031 vreinterpretq_p8_p16 (poly16x8_t __a)
3032 {
3033   return (poly8x16_t) __a;
3034 }
3035 
3036 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f16(float16x4_t __a)3037 vreinterpret_p16_f16 (float16x4_t __a)
3038 {
3039   return (poly16x4_t) __a;
3040 }
3041 
3042 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f64(float64x1_t __a)3043 vreinterpret_p16_f64 (float64x1_t __a)
3044 {
3045   return (poly16x4_t) __a;
3046 }
3047 
3048 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s8(int8x8_t __a)3049 vreinterpret_p16_s8 (int8x8_t __a)
3050 {
3051   return (poly16x4_t) __a;
3052 }
3053 
3054 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s16(int16x4_t __a)3055 vreinterpret_p16_s16 (int16x4_t __a)
3056 {
3057   return (poly16x4_t) __a;
3058 }
3059 
3060 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s32(int32x2_t __a)3061 vreinterpret_p16_s32 (int32x2_t __a)
3062 {
3063   return (poly16x4_t) __a;
3064 }
3065 
3066 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s64(int64x1_t __a)3067 vreinterpret_p16_s64 (int64x1_t __a)
3068 {
3069   return (poly16x4_t) __a;
3070 }
3071 
3072 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f32(float32x2_t __a)3073 vreinterpret_p16_f32 (float32x2_t __a)
3074 {
3075   return (poly16x4_t) __a;
3076 }
3077 
3078 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u8(uint8x8_t __a)3079 vreinterpret_p16_u8 (uint8x8_t __a)
3080 {
3081   return (poly16x4_t) __a;
3082 }
3083 
3084 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u16(uint16x4_t __a)3085 vreinterpret_p16_u16 (uint16x4_t __a)
3086 {
3087   return (poly16x4_t) __a;
3088 }
3089 
3090 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u32(uint32x2_t __a)3091 vreinterpret_p16_u32 (uint32x2_t __a)
3092 {
3093   return (poly16x4_t) __a;
3094 }
3095 
3096 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u64(uint64x1_t __a)3097 vreinterpret_p16_u64 (uint64x1_t __a)
3098 {
3099   return (poly16x4_t) __a;
3100 }
3101 
3102 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_p8(poly8x8_t __a)3103 vreinterpret_p16_p8 (poly8x8_t __a)
3104 {
3105   return (poly16x4_t) __a;
3106 }
3107 
3108 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f64(float64x2_t __a)3109 vreinterpretq_p16_f64 (float64x2_t __a)
3110 {
3111   return (poly16x8_t) __a;
3112 }
3113 
3114 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s8(int8x16_t __a)3115 vreinterpretq_p16_s8 (int8x16_t __a)
3116 {
3117   return (poly16x8_t) __a;
3118 }
3119 
3120 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s16(int16x8_t __a)3121 vreinterpretq_p16_s16 (int16x8_t __a)
3122 {
3123   return (poly16x8_t) __a;
3124 }
3125 
3126 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s32(int32x4_t __a)3127 vreinterpretq_p16_s32 (int32x4_t __a)
3128 {
3129   return (poly16x8_t) __a;
3130 }
3131 
3132 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s64(int64x2_t __a)3133 vreinterpretq_p16_s64 (int64x2_t __a)
3134 {
3135   return (poly16x8_t) __a;
3136 }
3137 
3138 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f16(float16x8_t __a)3139 vreinterpretq_p16_f16 (float16x8_t __a)
3140 {
3141   return (poly16x8_t) __a;
3142 }
3143 
3144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f32(float32x4_t __a)3145 vreinterpretq_p16_f32 (float32x4_t __a)
3146 {
3147   return (poly16x8_t) __a;
3148 }
3149 
3150 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u8(uint8x16_t __a)3151 vreinterpretq_p16_u8 (uint8x16_t __a)
3152 {
3153   return (poly16x8_t) __a;
3154 }
3155 
3156 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u16(uint16x8_t __a)3157 vreinterpretq_p16_u16 (uint16x8_t __a)
3158 {
3159   return (poly16x8_t) __a;
3160 }
3161 
3162 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u32(uint32x4_t __a)3163 vreinterpretq_p16_u32 (uint32x4_t __a)
3164 {
3165   return (poly16x8_t) __a;
3166 }
3167 
3168 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u64(uint64x2_t __a)3169 vreinterpretq_p16_u64 (uint64x2_t __a)
3170 {
3171   return (poly16x8_t) __a;
3172 }
3173 
3174 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_p8(poly8x16_t __a)3175 vreinterpretq_p16_p8 (poly8x16_t __a)
3176 {
3177   return (poly16x8_t) __a;
3178 }
3179 
3180 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_f64(float64x1_t __a)3181 vreinterpret_f16_f64 (float64x1_t __a)
3182 {
3183   return (float16x4_t) __a;
3184 }
3185 
3186 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_s8(int8x8_t __a)3187 vreinterpret_f16_s8 (int8x8_t __a)
3188 {
3189   return (float16x4_t) __a;
3190 }
3191 
3192 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_s16(int16x4_t __a)3193 vreinterpret_f16_s16 (int16x4_t __a)
3194 {
3195   return (float16x4_t) __a;
3196 }
3197 
3198 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_s32(int32x2_t __a)3199 vreinterpret_f16_s32 (int32x2_t __a)
3200 {
3201   return (float16x4_t) __a;
3202 }
3203 
3204 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_s64(int64x1_t __a)3205 vreinterpret_f16_s64 (int64x1_t __a)
3206 {
3207   return (float16x4_t) __a;
3208 }
3209 
3210 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_f32(float32x2_t __a)3211 vreinterpret_f16_f32 (float32x2_t __a)
3212 {
3213   return (float16x4_t) __a;
3214 }
3215 
3216 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_u8(uint8x8_t __a)3217 vreinterpret_f16_u8 (uint8x8_t __a)
3218 {
3219   return (float16x4_t) __a;
3220 }
3221 
3222 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_u16(uint16x4_t __a)3223 vreinterpret_f16_u16 (uint16x4_t __a)
3224 {
3225   return (float16x4_t) __a;
3226 }
3227 
3228 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_u32(uint32x2_t __a)3229 vreinterpret_f16_u32 (uint32x2_t __a)
3230 {
3231   return (float16x4_t) __a;
3232 }
3233 
3234 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_u64(uint64x1_t __a)3235 vreinterpret_f16_u64 (uint64x1_t __a)
3236 {
3237   return (float16x4_t) __a;
3238 }
3239 
3240 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_p8(poly8x8_t __a)3241 vreinterpret_f16_p8 (poly8x8_t __a)
3242 {
3243   return (float16x4_t) __a;
3244 }
3245 
3246 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_p16(poly16x4_t __a)3247 vreinterpret_f16_p16 (poly16x4_t __a)
3248 {
3249   return (float16x4_t) __a;
3250 }
3251 
3252 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_f64(float64x2_t __a)3253 vreinterpretq_f16_f64 (float64x2_t __a)
3254 {
3255   return (float16x8_t) __a;
3256 }
3257 
3258 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_s8(int8x16_t __a)3259 vreinterpretq_f16_s8 (int8x16_t __a)
3260 {
3261   return (float16x8_t) __a;
3262 }
3263 
3264 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_s16(int16x8_t __a)3265 vreinterpretq_f16_s16 (int16x8_t __a)
3266 {
3267   return (float16x8_t) __a;
3268 }
3269 
3270 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_s32(int32x4_t __a)3271 vreinterpretq_f16_s32 (int32x4_t __a)
3272 {
3273   return (float16x8_t) __a;
3274 }
3275 
3276 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_s64(int64x2_t __a)3277 vreinterpretq_f16_s64 (int64x2_t __a)
3278 {
3279   return (float16x8_t) __a;
3280 }
3281 
3282 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_f32(float32x4_t __a)3283 vreinterpretq_f16_f32 (float32x4_t __a)
3284 {
3285   return (float16x8_t) __a;
3286 }
3287 
3288 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_u8(uint8x16_t __a)3289 vreinterpretq_f16_u8 (uint8x16_t __a)
3290 {
3291   return (float16x8_t) __a;
3292 }
3293 
3294 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_u16(uint16x8_t __a)3295 vreinterpretq_f16_u16 (uint16x8_t __a)
3296 {
3297   return (float16x8_t) __a;
3298 }
3299 
3300 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_u32(uint32x4_t __a)3301 vreinterpretq_f16_u32 (uint32x4_t __a)
3302 {
3303   return (float16x8_t) __a;
3304 }
3305 
3306 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_u64(uint64x2_t __a)3307 vreinterpretq_f16_u64 (uint64x2_t __a)
3308 {
3309   return (float16x8_t) __a;
3310 }
3311 
3312 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_p8(poly8x16_t __a)3313 vreinterpretq_f16_p8 (poly8x16_t __a)
3314 {
3315   return (float16x8_t) __a;
3316 }
3317 
3318 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_p16(poly16x8_t __a)3319 vreinterpretq_f16_p16 (poly16x8_t __a)
3320 {
3321   return (float16x8_t) __a;
3322 }
3323 
3324 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_f16(float16x4_t __a)3325 vreinterpret_f32_f16 (float16x4_t __a)
3326 {
3327   return (float32x2_t) __a;
3328 }
3329 
3330 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_f64(float64x1_t __a)3331 vreinterpret_f32_f64 (float64x1_t __a)
3332 {
3333   return (float32x2_t) __a;
3334 }
3335 
3336 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s8(int8x8_t __a)3337 vreinterpret_f32_s8 (int8x8_t __a)
3338 {
3339   return (float32x2_t) __a;
3340 }
3341 
3342 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s16(int16x4_t __a)3343 vreinterpret_f32_s16 (int16x4_t __a)
3344 {
3345   return (float32x2_t) __a;
3346 }
3347 
3348 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s32(int32x2_t __a)3349 vreinterpret_f32_s32 (int32x2_t __a)
3350 {
3351   return (float32x2_t) __a;
3352 }
3353 
3354 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s64(int64x1_t __a)3355 vreinterpret_f32_s64 (int64x1_t __a)
3356 {
3357   return (float32x2_t) __a;
3358 }
3359 
3360 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u8(uint8x8_t __a)3361 vreinterpret_f32_u8 (uint8x8_t __a)
3362 {
3363   return (float32x2_t) __a;
3364 }
3365 
3366 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u16(uint16x4_t __a)3367 vreinterpret_f32_u16 (uint16x4_t __a)
3368 {
3369   return (float32x2_t) __a;
3370 }
3371 
3372 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u32(uint32x2_t __a)3373 vreinterpret_f32_u32 (uint32x2_t __a)
3374 {
3375   return (float32x2_t) __a;
3376 }
3377 
3378 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u64(uint64x1_t __a)3379 vreinterpret_f32_u64 (uint64x1_t __a)
3380 {
3381   return (float32x2_t) __a;
3382 }
3383 
3384 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p8(poly8x8_t __a)3385 vreinterpret_f32_p8 (poly8x8_t __a)
3386 {
3387   return (float32x2_t) __a;
3388 }
3389 
3390 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p16(poly16x4_t __a)3391 vreinterpret_f32_p16 (poly16x4_t __a)
3392 {
3393   return (float32x2_t) __a;
3394 }
3395 
3396 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_f16(float16x8_t __a)3397 vreinterpretq_f32_f16 (float16x8_t __a)
3398 {
3399   return (float32x4_t) __a;
3400 }
3401 
3402 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_f64(float64x2_t __a)3403 vreinterpretq_f32_f64 (float64x2_t __a)
3404 {
3405   return (float32x4_t) __a;
3406 }
3407 
3408 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s8(int8x16_t __a)3409 vreinterpretq_f32_s8 (int8x16_t __a)
3410 {
3411   return (float32x4_t) __a;
3412 }
3413 
3414 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s16(int16x8_t __a)3415 vreinterpretq_f32_s16 (int16x8_t __a)
3416 {
3417   return (float32x4_t) __a;
3418 }
3419 
3420 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s32(int32x4_t __a)3421 vreinterpretq_f32_s32 (int32x4_t __a)
3422 {
3423   return (float32x4_t) __a;
3424 }
3425 
3426 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s64(int64x2_t __a)3427 vreinterpretq_f32_s64 (int64x2_t __a)
3428 {
3429   return (float32x4_t) __a;
3430 }
3431 
3432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u8(uint8x16_t __a)3433 vreinterpretq_f32_u8 (uint8x16_t __a)
3434 {
3435   return (float32x4_t) __a;
3436 }
3437 
3438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u16(uint16x8_t __a)3439 vreinterpretq_f32_u16 (uint16x8_t __a)
3440 {
3441   return (float32x4_t) __a;
3442 }
3443 
3444 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u32(uint32x4_t __a)3445 vreinterpretq_f32_u32 (uint32x4_t __a)
3446 {
3447   return (float32x4_t) __a;
3448 }
3449 
3450 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u64(uint64x2_t __a)3451 vreinterpretq_f32_u64 (uint64x2_t __a)
3452 {
3453   return (float32x4_t) __a;
3454 }
3455 
3456 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p8(poly8x16_t __a)3457 vreinterpretq_f32_p8 (poly8x16_t __a)
3458 {
3459   return (float32x4_t) __a;
3460 }
3461 
3462 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p16(poly16x8_t __a)3463 vreinterpretq_f32_p16 (poly16x8_t __a)
3464 {
3465   return (float32x4_t) __a;
3466 }
3467 
3468 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_f16(float16x4_t __a)3469 vreinterpret_f64_f16 (float16x4_t __a)
3470 {
3471   return (float64x1_t) __a;
3472 }
3473 
3474 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_f32(float32x2_t __a)3475 vreinterpret_f64_f32 (float32x2_t __a)
3476 {
3477   return (float64x1_t) __a;
3478 }
3479 
3480 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_p8(poly8x8_t __a)3481 vreinterpret_f64_p8 (poly8x8_t __a)
3482 {
3483   return (float64x1_t) __a;
3484 }
3485 
3486 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_p16(poly16x4_t __a)3487 vreinterpret_f64_p16 (poly16x4_t __a)
3488 {
3489   return (float64x1_t) __a;
3490 }
3491 
3492 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_s8(int8x8_t __a)3493 vreinterpret_f64_s8 (int8x8_t __a)
3494 {
3495   return (float64x1_t) __a;
3496 }
3497 
3498 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_s16(int16x4_t __a)3499 vreinterpret_f64_s16 (int16x4_t __a)
3500 {
3501   return (float64x1_t) __a;
3502 }
3503 
3504 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_s32(int32x2_t __a)3505 vreinterpret_f64_s32 (int32x2_t __a)
3506 {
3507   return (float64x1_t) __a;
3508 }
3509 
3510 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_s64(int64x1_t __a)3511 vreinterpret_f64_s64 (int64x1_t __a)
3512 {
3513   return (float64x1_t) __a;
3514 }
3515 
3516 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_u8(uint8x8_t __a)3517 vreinterpret_f64_u8 (uint8x8_t __a)
3518 {
3519   return (float64x1_t) __a;
3520 }
3521 
3522 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_u16(uint16x4_t __a)3523 vreinterpret_f64_u16 (uint16x4_t __a)
3524 {
3525   return (float64x1_t) __a;
3526 }
3527 
3528 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_u32(uint32x2_t __a)3529 vreinterpret_f64_u32 (uint32x2_t __a)
3530 {
3531   return (float64x1_t) __a;
3532 }
3533 
3534 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_u64(uint64x1_t __a)3535 vreinterpret_f64_u64 (uint64x1_t __a)
3536 {
3537   return (float64x1_t) __a;
3538 }
3539 
3540 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_f16(float16x8_t __a)3541 vreinterpretq_f64_f16 (float16x8_t __a)
3542 {
3543   return (float64x2_t) __a;
3544 }
3545 
3546 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_f32(float32x4_t __a)3547 vreinterpretq_f64_f32 (float32x4_t __a)
3548 {
3549   return (float64x2_t) __a;
3550 }
3551 
3552 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_p8(poly8x16_t __a)3553 vreinterpretq_f64_p8 (poly8x16_t __a)
3554 {
3555   return (float64x2_t) __a;
3556 }
3557 
3558 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_p16(poly16x8_t __a)3559 vreinterpretq_f64_p16 (poly16x8_t __a)
3560 {
3561   return (float64x2_t) __a;
3562 }
3563 
3564 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_s8(int8x16_t __a)3565 vreinterpretq_f64_s8 (int8x16_t __a)
3566 {
3567   return (float64x2_t) __a;
3568 }
3569 
3570 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_s16(int16x8_t __a)3571 vreinterpretq_f64_s16 (int16x8_t __a)
3572 {
3573   return (float64x2_t) __a;
3574 }
3575 
3576 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_s32(int32x4_t __a)3577 vreinterpretq_f64_s32 (int32x4_t __a)
3578 {
3579   return (float64x2_t) __a;
3580 }
3581 
3582 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_s64(int64x2_t __a)3583 vreinterpretq_f64_s64 (int64x2_t __a)
3584 {
3585   return (float64x2_t) __a;
3586 }
3587 
3588 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_u8(uint8x16_t __a)3589 vreinterpretq_f64_u8 (uint8x16_t __a)
3590 {
3591   return (float64x2_t) __a;
3592 }
3593 
3594 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_u16(uint16x8_t __a)3595 vreinterpretq_f64_u16 (uint16x8_t __a)
3596 {
3597   return (float64x2_t) __a;
3598 }
3599 
3600 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_u32(uint32x4_t __a)3601 vreinterpretq_f64_u32 (uint32x4_t __a)
3602 {
3603   return (float64x2_t) __a;
3604 }
3605 
3606 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_u64(uint64x2_t __a)3607 vreinterpretq_f64_u64 (uint64x2_t __a)
3608 {
3609   return (float64x2_t) __a;
3610 }
3611 
3612 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f16(float16x4_t __a)3613 vreinterpret_s64_f16 (float16x4_t __a)
3614 {
3615   return (int64x1_t) __a;
3616 }
3617 
3618 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f64(float64x1_t __a)3619 vreinterpret_s64_f64 (float64x1_t __a)
3620 {
3621   return (int64x1_t) __a;
3622 }
3623 
3624 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s8(int8x8_t __a)3625 vreinterpret_s64_s8 (int8x8_t __a)
3626 {
3627   return (int64x1_t) __a;
3628 }
3629 
3630 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s16(int16x4_t __a)3631 vreinterpret_s64_s16 (int16x4_t __a)
3632 {
3633   return (int64x1_t) __a;
3634 }
3635 
3636 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s32(int32x2_t __a)3637 vreinterpret_s64_s32 (int32x2_t __a)
3638 {
3639   return (int64x1_t) __a;
3640 }
3641 
3642 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f32(float32x2_t __a)3643 vreinterpret_s64_f32 (float32x2_t __a)
3644 {
3645   return (int64x1_t) __a;
3646 }
3647 
3648 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u8(uint8x8_t __a)3649 vreinterpret_s64_u8 (uint8x8_t __a)
3650 {
3651   return (int64x1_t) __a;
3652 }
3653 
3654 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u16(uint16x4_t __a)3655 vreinterpret_s64_u16 (uint16x4_t __a)
3656 {
3657   return (int64x1_t) __a;
3658 }
3659 
3660 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u32(uint32x2_t __a)3661 vreinterpret_s64_u32 (uint32x2_t __a)
3662 {
3663   return (int64x1_t) __a;
3664 }
3665 
3666 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u64(uint64x1_t __a)3667 vreinterpret_s64_u64 (uint64x1_t __a)
3668 {
3669   return (int64x1_t) __a;
3670 }
3671 
3672 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p8(poly8x8_t __a)3673 vreinterpret_s64_p8 (poly8x8_t __a)
3674 {
3675   return (int64x1_t) __a;
3676 }
3677 
3678 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p16(poly16x4_t __a)3679 vreinterpret_s64_p16 (poly16x4_t __a)
3680 {
3681   return (int64x1_t) __a;
3682 }
3683 
3684 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f64(float64x2_t __a)3685 vreinterpretq_s64_f64 (float64x2_t __a)
3686 {
3687   return (int64x2_t) __a;
3688 }
3689 
3690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s8(int8x16_t __a)3691 vreinterpretq_s64_s8 (int8x16_t __a)
3692 {
3693   return (int64x2_t) __a;
3694 }
3695 
3696 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s16(int16x8_t __a)3697 vreinterpretq_s64_s16 (int16x8_t __a)
3698 {
3699   return (int64x2_t) __a;
3700 }
3701 
3702 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s32(int32x4_t __a)3703 vreinterpretq_s64_s32 (int32x4_t __a)
3704 {
3705   return (int64x2_t) __a;
3706 }
3707 
3708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f16(float16x8_t __a)3709 vreinterpretq_s64_f16 (float16x8_t __a)
3710 {
3711   return (int64x2_t) __a;
3712 }
3713 
3714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f32(float32x4_t __a)3715 vreinterpretq_s64_f32 (float32x4_t __a)
3716 {
3717   return (int64x2_t) __a;
3718 }
3719 
3720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u8(uint8x16_t __a)3721 vreinterpretq_s64_u8 (uint8x16_t __a)
3722 {
3723   return (int64x2_t) __a;
3724 }
3725 
3726 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u16(uint16x8_t __a)3727 vreinterpretq_s64_u16 (uint16x8_t __a)
3728 {
3729   return (int64x2_t) __a;
3730 }
3731 
3732 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u32(uint32x4_t __a)3733 vreinterpretq_s64_u32 (uint32x4_t __a)
3734 {
3735   return (int64x2_t) __a;
3736 }
3737 
3738 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u64(uint64x2_t __a)3739 vreinterpretq_s64_u64 (uint64x2_t __a)
3740 {
3741   return (int64x2_t) __a;
3742 }
3743 
3744 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p8(poly8x16_t __a)3745 vreinterpretq_s64_p8 (poly8x16_t __a)
3746 {
3747   return (int64x2_t) __a;
3748 }
3749 
3750 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p16(poly16x8_t __a)3751 vreinterpretq_s64_p16 (poly16x8_t __a)
3752 {
3753   return (int64x2_t) __a;
3754 }
3755 
3756 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f16(float16x4_t __a)3757 vreinterpret_u64_f16 (float16x4_t __a)
3758 {
3759   return (uint64x1_t) __a;
3760 }
3761 
3762 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f64(float64x1_t __a)3763 vreinterpret_u64_f64 (float64x1_t __a)
3764 {
3765   return (uint64x1_t) __a;
3766 }
3767 
3768 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s8(int8x8_t __a)3769 vreinterpret_u64_s8 (int8x8_t __a)
3770 {
3771   return (uint64x1_t) __a;
3772 }
3773 
3774 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s16(int16x4_t __a)3775 vreinterpret_u64_s16 (int16x4_t __a)
3776 {
3777   return (uint64x1_t) __a;
3778 }
3779 
3780 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s32(int32x2_t __a)3781 vreinterpret_u64_s32 (int32x2_t __a)
3782 {
3783   return (uint64x1_t) __a;
3784 }
3785 
3786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s64(int64x1_t __a)3787 vreinterpret_u64_s64 (int64x1_t __a)
3788 {
3789   return (uint64x1_t) __a;
3790 }
3791 
3792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f32(float32x2_t __a)3793 vreinterpret_u64_f32 (float32x2_t __a)
3794 {
3795   return (uint64x1_t) __a;
3796 }
3797 
3798 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u8(uint8x8_t __a)3799 vreinterpret_u64_u8 (uint8x8_t __a)
3800 {
3801   return (uint64x1_t) __a;
3802 }
3803 
3804 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u16(uint16x4_t __a)3805 vreinterpret_u64_u16 (uint16x4_t __a)
3806 {
3807   return (uint64x1_t) __a;
3808 }
3809 
3810 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u32(uint32x2_t __a)3811 vreinterpret_u64_u32 (uint32x2_t __a)
3812 {
3813   return (uint64x1_t) __a;
3814 }
3815 
3816 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_p8(poly8x8_t __a)3817 vreinterpret_u64_p8 (poly8x8_t __a)
3818 {
3819   return (uint64x1_t) __a;
3820 }
3821 
3822 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_p16(poly16x4_t __a)3823 vreinterpret_u64_p16 (poly16x4_t __a)
3824 {
3825   return (uint64x1_t) __a;
3826 }
3827 
3828 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f64(float64x2_t __a)3829 vreinterpretq_u64_f64 (float64x2_t __a)
3830 {
3831   return (uint64x2_t) __a;
3832 }
3833 
3834 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s8(int8x16_t __a)3835 vreinterpretq_u64_s8 (int8x16_t __a)
3836 {
3837   return (uint64x2_t) __a;
3838 }
3839 
3840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s16(int16x8_t __a)3841 vreinterpretq_u64_s16 (int16x8_t __a)
3842 {
3843   return (uint64x2_t) __a;
3844 }
3845 
3846 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s32(int32x4_t __a)3847 vreinterpretq_u64_s32 (int32x4_t __a)
3848 {
3849   return (uint64x2_t) __a;
3850 }
3851 
3852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s64(int64x2_t __a)3853 vreinterpretq_u64_s64 (int64x2_t __a)
3854 {
3855   return (uint64x2_t) __a;
3856 }
3857 
3858 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f16(float16x8_t __a)3859 vreinterpretq_u64_f16 (float16x8_t __a)
3860 {
3861   return (uint64x2_t) __a;
3862 }
3863 
3864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f32(float32x4_t __a)3865 vreinterpretq_u64_f32 (float32x4_t __a)
3866 {
3867   return (uint64x2_t) __a;
3868 }
3869 
3870 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u8(uint8x16_t __a)3871 vreinterpretq_u64_u8 (uint8x16_t __a)
3872 {
3873   return (uint64x2_t) __a;
3874 }
3875 
3876 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u16(uint16x8_t __a)3877 vreinterpretq_u64_u16 (uint16x8_t __a)
3878 {
3879   return (uint64x2_t) __a;
3880 }
3881 
3882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u32(uint32x4_t __a)3883 vreinterpretq_u64_u32 (uint32x4_t __a)
3884 {
3885   return (uint64x2_t) __a;
3886 }
3887 
3888 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p8(poly8x16_t __a)3889 vreinterpretq_u64_p8 (poly8x16_t __a)
3890 {
3891   return (uint64x2_t) __a;
3892 }
3893 
3894 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p16(poly16x8_t __a)3895 vreinterpretq_u64_p16 (poly16x8_t __a)
3896 {
3897   return (uint64x2_t) __a;
3898 }
3899 
3900 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f16(float16x4_t __a)3901 vreinterpret_s8_f16 (float16x4_t __a)
3902 {
3903   return (int8x8_t) __a;
3904 }
3905 
3906 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f64(float64x1_t __a)3907 vreinterpret_s8_f64 (float64x1_t __a)
3908 {
3909   return (int8x8_t) __a;
3910 }
3911 
3912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s16(int16x4_t __a)3913 vreinterpret_s8_s16 (int16x4_t __a)
3914 {
3915   return (int8x8_t) __a;
3916 }
3917 
3918 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s32(int32x2_t __a)3919 vreinterpret_s8_s32 (int32x2_t __a)
3920 {
3921   return (int8x8_t) __a;
3922 }
3923 
3924 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s64(int64x1_t __a)3925 vreinterpret_s8_s64 (int64x1_t __a)
3926 {
3927   return (int8x8_t) __a;
3928 }
3929 
3930 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f32(float32x2_t __a)3931 vreinterpret_s8_f32 (float32x2_t __a)
3932 {
3933   return (int8x8_t) __a;
3934 }
3935 
3936 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u8(uint8x8_t __a)3937 vreinterpret_s8_u8 (uint8x8_t __a)
3938 {
3939   return (int8x8_t) __a;
3940 }
3941 
3942 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u16(uint16x4_t __a)3943 vreinterpret_s8_u16 (uint16x4_t __a)
3944 {
3945   return (int8x8_t) __a;
3946 }
3947 
3948 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u32(uint32x2_t __a)3949 vreinterpret_s8_u32 (uint32x2_t __a)
3950 {
3951   return (int8x8_t) __a;
3952 }
3953 
3954 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u64(uint64x1_t __a)3955 vreinterpret_s8_u64 (uint64x1_t __a)
3956 {
3957   return (int8x8_t) __a;
3958 }
3959 
3960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_p8(poly8x8_t __a)3961 vreinterpret_s8_p8 (poly8x8_t __a)
3962 {
3963   return (int8x8_t) __a;
3964 }
3965 
3966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_p16(poly16x4_t __a)3967 vreinterpret_s8_p16 (poly16x4_t __a)
3968 {
3969   return (int8x8_t) __a;
3970 }
3971 
3972 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f64(float64x2_t __a)3973 vreinterpretq_s8_f64 (float64x2_t __a)
3974 {
3975   return (int8x16_t) __a;
3976 }
3977 
3978 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s16(int16x8_t __a)3979 vreinterpretq_s8_s16 (int16x8_t __a)
3980 {
3981   return (int8x16_t) __a;
3982 }
3983 
3984 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s32(int32x4_t __a)3985 vreinterpretq_s8_s32 (int32x4_t __a)
3986 {
3987   return (int8x16_t) __a;
3988 }
3989 
3990 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s64(int64x2_t __a)3991 vreinterpretq_s8_s64 (int64x2_t __a)
3992 {
3993   return (int8x16_t) __a;
3994 }
3995 
3996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f16(float16x8_t __a)3997 vreinterpretq_s8_f16 (float16x8_t __a)
3998 {
3999   return (int8x16_t) __a;
4000 }
4001 
4002 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f32(float32x4_t __a)4003 vreinterpretq_s8_f32 (float32x4_t __a)
4004 {
4005   return (int8x16_t) __a;
4006 }
4007 
4008 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u8(uint8x16_t __a)4009 vreinterpretq_s8_u8 (uint8x16_t __a)
4010 {
4011   return (int8x16_t) __a;
4012 }
4013 
4014 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u16(uint16x8_t __a)4015 vreinterpretq_s8_u16 (uint16x8_t __a)
4016 {
4017   return (int8x16_t) __a;
4018 }
4019 
4020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u32(uint32x4_t __a)4021 vreinterpretq_s8_u32 (uint32x4_t __a)
4022 {
4023   return (int8x16_t) __a;
4024 }
4025 
4026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u64(uint64x2_t __a)4027 vreinterpretq_s8_u64 (uint64x2_t __a)
4028 {
4029   return (int8x16_t) __a;
4030 }
4031 
4032 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p8(poly8x16_t __a)4033 vreinterpretq_s8_p8 (poly8x16_t __a)
4034 {
4035   return (int8x16_t) __a;
4036 }
4037 
4038 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p16(poly16x8_t __a)4039 vreinterpretq_s8_p16 (poly16x8_t __a)
4040 {
4041   return (int8x16_t) __a;
4042 }
4043 
4044 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f16(float16x4_t __a)4045 vreinterpret_s16_f16 (float16x4_t __a)
4046 {
4047   return (int16x4_t) __a;
4048 }
4049 
4050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f64(float64x1_t __a)4051 vreinterpret_s16_f64 (float64x1_t __a)
4052 {
4053   return (int16x4_t) __a;
4054 }
4055 
4056 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s8(int8x8_t __a)4057 vreinterpret_s16_s8 (int8x8_t __a)
4058 {
4059   return (int16x4_t) __a;
4060 }
4061 
4062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s32(int32x2_t __a)4063 vreinterpret_s16_s32 (int32x2_t __a)
4064 {
4065   return (int16x4_t) __a;
4066 }
4067 
4068 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s64(int64x1_t __a)4069 vreinterpret_s16_s64 (int64x1_t __a)
4070 {
4071   return (int16x4_t) __a;
4072 }
4073 
4074 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f32(float32x2_t __a)4075 vreinterpret_s16_f32 (float32x2_t __a)
4076 {
4077   return (int16x4_t) __a;
4078 }
4079 
4080 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u8(uint8x8_t __a)4081 vreinterpret_s16_u8 (uint8x8_t __a)
4082 {
4083   return (int16x4_t) __a;
4084 }
4085 
4086 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u16(uint16x4_t __a)4087 vreinterpret_s16_u16 (uint16x4_t __a)
4088 {
4089   return (int16x4_t) __a;
4090 }
4091 
4092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u32(uint32x2_t __a)4093 vreinterpret_s16_u32 (uint32x2_t __a)
4094 {
4095   return (int16x4_t) __a;
4096 }
4097 
4098 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u64(uint64x1_t __a)4099 vreinterpret_s16_u64 (uint64x1_t __a)
4100 {
4101   return (int16x4_t) __a;
4102 }
4103 
4104 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_p8(poly8x8_t __a)4105 vreinterpret_s16_p8 (poly8x8_t __a)
4106 {
4107   return (int16x4_t) __a;
4108 }
4109 
4110 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_p16(poly16x4_t __a)4111 vreinterpret_s16_p16 (poly16x4_t __a)
4112 {
4113   return (int16x4_t) __a;
4114 }
4115 
4116 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f64(float64x2_t __a)4117 vreinterpretq_s16_f64 (float64x2_t __a)
4118 {
4119   return (int16x8_t) __a;
4120 }
4121 
4122 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s8(int8x16_t __a)4123 vreinterpretq_s16_s8 (int8x16_t __a)
4124 {
4125   return (int16x8_t) __a;
4126 }
4127 
4128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s32(int32x4_t __a)4129 vreinterpretq_s16_s32 (int32x4_t __a)
4130 {
4131   return (int16x8_t) __a;
4132 }
4133 
4134 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s64(int64x2_t __a)4135 vreinterpretq_s16_s64 (int64x2_t __a)
4136 {
4137   return (int16x8_t) __a;
4138 }
4139 
4140 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f16(float16x8_t __a)4141 vreinterpretq_s16_f16 (float16x8_t __a)
4142 {
4143   return (int16x8_t) __a;
4144 }
4145 
4146 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f32(float32x4_t __a)4147 vreinterpretq_s16_f32 (float32x4_t __a)
4148 {
4149   return (int16x8_t) __a;
4150 }
4151 
4152 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u8(uint8x16_t __a)4153 vreinterpretq_s16_u8 (uint8x16_t __a)
4154 {
4155   return (int16x8_t) __a;
4156 }
4157 
4158 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u16(uint16x8_t __a)4159 vreinterpretq_s16_u16 (uint16x8_t __a)
4160 {
4161   return (int16x8_t) __a;
4162 }
4163 
4164 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u32(uint32x4_t __a)4165 vreinterpretq_s16_u32 (uint32x4_t __a)
4166 {
4167   return (int16x8_t) __a;
4168 }
4169 
4170 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u64(uint64x2_t __a)4171 vreinterpretq_s16_u64 (uint64x2_t __a)
4172 {
4173   return (int16x8_t) __a;
4174 }
4175 
4176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p8(poly8x16_t __a)4177 vreinterpretq_s16_p8 (poly8x16_t __a)
4178 {
4179   return (int16x8_t) __a;
4180 }
4181 
4182 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p16(poly16x8_t __a)4183 vreinterpretq_s16_p16 (poly16x8_t __a)
4184 {
4185   return (int16x8_t) __a;
4186 }
4187 
4188 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f16(float16x4_t __a)4189 vreinterpret_s32_f16 (float16x4_t __a)
4190 {
4191   return (int32x2_t) __a;
4192 }
4193 
4194 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f64(float64x1_t __a)4195 vreinterpret_s32_f64 (float64x1_t __a)
4196 {
4197   return (int32x2_t) __a;
4198 }
4199 
4200 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s8(int8x8_t __a)4201 vreinterpret_s32_s8 (int8x8_t __a)
4202 {
4203   return (int32x2_t) __a;
4204 }
4205 
4206 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s16(int16x4_t __a)4207 vreinterpret_s32_s16 (int16x4_t __a)
4208 {
4209   return (int32x2_t) __a;
4210 }
4211 
4212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s64(int64x1_t __a)4213 vreinterpret_s32_s64 (int64x1_t __a)
4214 {
4215   return (int32x2_t) __a;
4216 }
4217 
4218 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f32(float32x2_t __a)4219 vreinterpret_s32_f32 (float32x2_t __a)
4220 {
4221   return (int32x2_t) __a;
4222 }
4223 
4224 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u8(uint8x8_t __a)4225 vreinterpret_s32_u8 (uint8x8_t __a)
4226 {
4227   return (int32x2_t) __a;
4228 }
4229 
4230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u16(uint16x4_t __a)4231 vreinterpret_s32_u16 (uint16x4_t __a)
4232 {
4233   return (int32x2_t) __a;
4234 }
4235 
4236 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u32(uint32x2_t __a)4237 vreinterpret_s32_u32 (uint32x2_t __a)
4238 {
4239   return (int32x2_t) __a;
4240 }
4241 
4242 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u64(uint64x1_t __a)4243 vreinterpret_s32_u64 (uint64x1_t __a)
4244 {
4245   return (int32x2_t) __a;
4246 }
4247 
4248 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_p8(poly8x8_t __a)4249 vreinterpret_s32_p8 (poly8x8_t __a)
4250 {
4251   return (int32x2_t) __a;
4252 }
4253 
4254 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_p16(poly16x4_t __a)4255 vreinterpret_s32_p16 (poly16x4_t __a)
4256 {
4257   return (int32x2_t) __a;
4258 }
4259 
4260 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f64(float64x2_t __a)4261 vreinterpretq_s32_f64 (float64x2_t __a)
4262 {
4263   return (int32x4_t) __a;
4264 }
4265 
4266 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s8(int8x16_t __a)4267 vreinterpretq_s32_s8 (int8x16_t __a)
4268 {
4269   return (int32x4_t) __a;
4270 }
4271 
4272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s16(int16x8_t __a)4273 vreinterpretq_s32_s16 (int16x8_t __a)
4274 {
4275   return (int32x4_t) __a;
4276 }
4277 
4278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s64(int64x2_t __a)4279 vreinterpretq_s32_s64 (int64x2_t __a)
4280 {
4281   return (int32x4_t) __a;
4282 }
4283 
4284 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f16(float16x8_t __a)4285 vreinterpretq_s32_f16 (float16x8_t __a)
4286 {
4287   return (int32x4_t) __a;
4288 }
4289 
4290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f32(float32x4_t __a)4291 vreinterpretq_s32_f32 (float32x4_t __a)
4292 {
4293   return (int32x4_t) __a;
4294 }
4295 
4296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u8(uint8x16_t __a)4297 vreinterpretq_s32_u8 (uint8x16_t __a)
4298 {
4299   return (int32x4_t) __a;
4300 }
4301 
4302 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u16(uint16x8_t __a)4303 vreinterpretq_s32_u16 (uint16x8_t __a)
4304 {
4305   return (int32x4_t) __a;
4306 }
4307 
4308 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u32(uint32x4_t __a)4309 vreinterpretq_s32_u32 (uint32x4_t __a)
4310 {
4311   return (int32x4_t) __a;
4312 }
4313 
4314 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u64(uint64x2_t __a)4315 vreinterpretq_s32_u64 (uint64x2_t __a)
4316 {
4317   return (int32x4_t) __a;
4318 }
4319 
4320 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p8(poly8x16_t __a)4321 vreinterpretq_s32_p8 (poly8x16_t __a)
4322 {
4323   return (int32x4_t) __a;
4324 }
4325 
4326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p16(poly16x8_t __a)4327 vreinterpretq_s32_p16 (poly16x8_t __a)
4328 {
4329   return (int32x4_t) __a;
4330 }
4331 
4332 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f16(float16x4_t __a)4333 vreinterpret_u8_f16 (float16x4_t __a)
4334 {
4335   return (uint8x8_t) __a;
4336 }
4337 
4338 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f64(float64x1_t __a)4339 vreinterpret_u8_f64 (float64x1_t __a)
4340 {
4341   return (uint8x8_t) __a;
4342 }
4343 
4344 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s8(int8x8_t __a)4345 vreinterpret_u8_s8 (int8x8_t __a)
4346 {
4347   return (uint8x8_t) __a;
4348 }
4349 
4350 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s16(int16x4_t __a)4351 vreinterpret_u8_s16 (int16x4_t __a)
4352 {
4353   return (uint8x8_t) __a;
4354 }
4355 
4356 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s32(int32x2_t __a)4357 vreinterpret_u8_s32 (int32x2_t __a)
4358 {
4359   return (uint8x8_t) __a;
4360 }
4361 
4362 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s64(int64x1_t __a)4363 vreinterpret_u8_s64 (int64x1_t __a)
4364 {
4365   return (uint8x8_t) __a;
4366 }
4367 
4368 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f32(float32x2_t __a)4369 vreinterpret_u8_f32 (float32x2_t __a)
4370 {
4371   return (uint8x8_t) __a;
4372 }
4373 
4374 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u16(uint16x4_t __a)4375 vreinterpret_u8_u16 (uint16x4_t __a)
4376 {
4377   return (uint8x8_t) __a;
4378 }
4379 
4380 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u32(uint32x2_t __a)4381 vreinterpret_u8_u32 (uint32x2_t __a)
4382 {
4383   return (uint8x8_t) __a;
4384 }
4385 
4386 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u64(uint64x1_t __a)4387 vreinterpret_u8_u64 (uint64x1_t __a)
4388 {
4389   return (uint8x8_t) __a;
4390 }
4391 
4392 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_p8(poly8x8_t __a)4393 vreinterpret_u8_p8 (poly8x8_t __a)
4394 {
4395   return (uint8x8_t) __a;
4396 }
4397 
4398 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_p16(poly16x4_t __a)4399 vreinterpret_u8_p16 (poly16x4_t __a)
4400 {
4401   return (uint8x8_t) __a;
4402 }
4403 
4404 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f64(float64x2_t __a)4405 vreinterpretq_u8_f64 (float64x2_t __a)
4406 {
4407   return (uint8x16_t) __a;
4408 }
4409 
4410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s8(int8x16_t __a)4411 vreinterpretq_u8_s8 (int8x16_t __a)
4412 {
4413   return (uint8x16_t) __a;
4414 }
4415 
4416 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s16(int16x8_t __a)4417 vreinterpretq_u8_s16 (int16x8_t __a)
4418 {
4419   return (uint8x16_t) __a;
4420 }
4421 
4422 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s32(int32x4_t __a)4423 vreinterpretq_u8_s32 (int32x4_t __a)
4424 {
4425   return (uint8x16_t) __a;
4426 }
4427 
4428 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s64(int64x2_t __a)4429 vreinterpretq_u8_s64 (int64x2_t __a)
4430 {
4431   return (uint8x16_t) __a;
4432 }
4433 
4434 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f16(float16x8_t __a)4435 vreinterpretq_u8_f16 (float16x8_t __a)
4436 {
4437   return (uint8x16_t) __a;
4438 }
4439 
4440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f32(float32x4_t __a)4441 vreinterpretq_u8_f32 (float32x4_t __a)
4442 {
4443   return (uint8x16_t) __a;
4444 }
4445 
4446 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u16(uint16x8_t __a)4447 vreinterpretq_u8_u16 (uint16x8_t __a)
4448 {
4449   return (uint8x16_t) __a;
4450 }
4451 
4452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u32(uint32x4_t __a)4453 vreinterpretq_u8_u32 (uint32x4_t __a)
4454 {
4455   return (uint8x16_t) __a;
4456 }
4457 
4458 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u64(uint64x2_t __a)4459 vreinterpretq_u8_u64 (uint64x2_t __a)
4460 {
4461   return (uint8x16_t) __a;
4462 }
4463 
4464 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p8(poly8x16_t __a)4465 vreinterpretq_u8_p8 (poly8x16_t __a)
4466 {
4467   return (uint8x16_t) __a;
4468 }
4469 
4470 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p16(poly16x8_t __a)4471 vreinterpretq_u8_p16 (poly16x8_t __a)
4472 {
4473   return (uint8x16_t) __a;
4474 }
4475 
4476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f16(float16x4_t __a)4477 vreinterpret_u16_f16 (float16x4_t __a)
4478 {
4479   return (uint16x4_t) __a;
4480 }
4481 
4482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f64(float64x1_t __a)4483 vreinterpret_u16_f64 (float64x1_t __a)
4484 {
4485   return (uint16x4_t) __a;
4486 }
4487 
4488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s8(int8x8_t __a)4489 vreinterpret_u16_s8 (int8x8_t __a)
4490 {
4491   return (uint16x4_t) __a;
4492 }
4493 
4494 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s16(int16x4_t __a)4495 vreinterpret_u16_s16 (int16x4_t __a)
4496 {
4497   return (uint16x4_t) __a;
4498 }
4499 
4500 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s32(int32x2_t __a)4501 vreinterpret_u16_s32 (int32x2_t __a)
4502 {
4503   return (uint16x4_t) __a;
4504 }
4505 
4506 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s64(int64x1_t __a)4507 vreinterpret_u16_s64 (int64x1_t __a)
4508 {
4509   return (uint16x4_t) __a;
4510 }
4511 
4512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f32(float32x2_t __a)4513 vreinterpret_u16_f32 (float32x2_t __a)
4514 {
4515   return (uint16x4_t) __a;
4516 }
4517 
4518 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u8(uint8x8_t __a)4519 vreinterpret_u16_u8 (uint8x8_t __a)
4520 {
4521   return (uint16x4_t) __a;
4522 }
4523 
4524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u32(uint32x2_t __a)4525 vreinterpret_u16_u32 (uint32x2_t __a)
4526 {
4527   return (uint16x4_t) __a;
4528 }
4529 
4530 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u64(uint64x1_t __a)4531 vreinterpret_u16_u64 (uint64x1_t __a)
4532 {
4533   return (uint16x4_t) __a;
4534 }
4535 
4536 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_p8(poly8x8_t __a)4537 vreinterpret_u16_p8 (poly8x8_t __a)
4538 {
4539   return (uint16x4_t) __a;
4540 }
4541 
4542 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_p16(poly16x4_t __a)4543 vreinterpret_u16_p16 (poly16x4_t __a)
4544 {
4545   return (uint16x4_t) __a;
4546 }
4547 
4548 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f64(float64x2_t __a)4549 vreinterpretq_u16_f64 (float64x2_t __a)
4550 {
4551   return (uint16x8_t) __a;
4552 }
4553 
4554 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s8(int8x16_t __a)4555 vreinterpretq_u16_s8 (int8x16_t __a)
4556 {
4557   return (uint16x8_t) __a;
4558 }
4559 
4560 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s16(int16x8_t __a)4561 vreinterpretq_u16_s16 (int16x8_t __a)
4562 {
4563   return (uint16x8_t) __a;
4564 }
4565 
4566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s32(int32x4_t __a)4567 vreinterpretq_u16_s32 (int32x4_t __a)
4568 {
4569   return (uint16x8_t) __a;
4570 }
4571 
4572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s64(int64x2_t __a)4573 vreinterpretq_u16_s64 (int64x2_t __a)
4574 {
4575   return (uint16x8_t) __a;
4576 }
4577 
4578 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f16(float16x8_t __a)4579 vreinterpretq_u16_f16 (float16x8_t __a)
4580 {
4581   return (uint16x8_t) __a;
4582 }
4583 
4584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f32(float32x4_t __a)4585 vreinterpretq_u16_f32 (float32x4_t __a)
4586 {
4587   return (uint16x8_t) __a;
4588 }
4589 
4590 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u8(uint8x16_t __a)4591 vreinterpretq_u16_u8 (uint8x16_t __a)
4592 {
4593   return (uint16x8_t) __a;
4594 }
4595 
4596 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u32(uint32x4_t __a)4597 vreinterpretq_u16_u32 (uint32x4_t __a)
4598 {
4599   return (uint16x8_t) __a;
4600 }
4601 
4602 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u64(uint64x2_t __a)4603 vreinterpretq_u16_u64 (uint64x2_t __a)
4604 {
4605   return (uint16x8_t) __a;
4606 }
4607 
4608 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p8(poly8x16_t __a)4609 vreinterpretq_u16_p8 (poly8x16_t __a)
4610 {
4611   return (uint16x8_t) __a;
4612 }
4613 
4614 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p16(poly16x8_t __a)4615 vreinterpretq_u16_p16 (poly16x8_t __a)
4616 {
4617   return (uint16x8_t) __a;
4618 }
4619 
4620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f16(float16x4_t __a)4621 vreinterpret_u32_f16 (float16x4_t __a)
4622 {
4623   return (uint32x2_t) __a;
4624 }
4625 
4626 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f64(float64x1_t __a)4627 vreinterpret_u32_f64 (float64x1_t __a)
4628 {
4629   return (uint32x2_t) __a;
4630 }
4631 
4632 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s8(int8x8_t __a)4633 vreinterpret_u32_s8 (int8x8_t __a)
4634 {
4635   return (uint32x2_t) __a;
4636 }
4637 
4638 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s16(int16x4_t __a)4639 vreinterpret_u32_s16 (int16x4_t __a)
4640 {
4641   return (uint32x2_t) __a;
4642 }
4643 
4644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s32(int32x2_t __a)4645 vreinterpret_u32_s32 (int32x2_t __a)
4646 {
4647   return (uint32x2_t) __a;
4648 }
4649 
4650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s64(int64x1_t __a)4651 vreinterpret_u32_s64 (int64x1_t __a)
4652 {
4653   return (uint32x2_t) __a;
4654 }
4655 
4656 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f32(float32x2_t __a)4657 vreinterpret_u32_f32 (float32x2_t __a)
4658 {
4659   return (uint32x2_t) __a;
4660 }
4661 
4662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u8(uint8x8_t __a)4663 vreinterpret_u32_u8 (uint8x8_t __a)
4664 {
4665   return (uint32x2_t) __a;
4666 }
4667 
4668 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u16(uint16x4_t __a)4669 vreinterpret_u32_u16 (uint16x4_t __a)
4670 {
4671   return (uint32x2_t) __a;
4672 }
4673 
4674 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u64(uint64x1_t __a)4675 vreinterpret_u32_u64 (uint64x1_t __a)
4676 {
4677   return (uint32x2_t) __a;
4678 }
4679 
4680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_p8(poly8x8_t __a)4681 vreinterpret_u32_p8 (poly8x8_t __a)
4682 {
4683   return (uint32x2_t) __a;
4684 }
4685 
4686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_p16(poly16x4_t __a)4687 vreinterpret_u32_p16 (poly16x4_t __a)
4688 {
4689   return (uint32x2_t) __a;
4690 }
4691 
4692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f64(float64x2_t __a)4693 vreinterpretq_u32_f64 (float64x2_t __a)
4694 {
4695   return (uint32x4_t) __a;
4696 }
4697 
4698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s8(int8x16_t __a)4699 vreinterpretq_u32_s8 (int8x16_t __a)
4700 {
4701   return (uint32x4_t) __a;
4702 }
4703 
4704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s16(int16x8_t __a)4705 vreinterpretq_u32_s16 (int16x8_t __a)
4706 {
4707   return (uint32x4_t) __a;
4708 }
4709 
4710 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s32(int32x4_t __a)4711 vreinterpretq_u32_s32 (int32x4_t __a)
4712 {
4713   return (uint32x4_t) __a;
4714 }
4715 
4716 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s64(int64x2_t __a)4717 vreinterpretq_u32_s64 (int64x2_t __a)
4718 {
4719   return (uint32x4_t) __a;
4720 }
4721 
4722 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f16(float16x8_t __a)4723 vreinterpretq_u32_f16 (float16x8_t __a)
4724 {
4725   return (uint32x4_t) __a;
4726 }
4727 
4728 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f32(float32x4_t __a)4729 vreinterpretq_u32_f32 (float32x4_t __a)
4730 {
4731   return (uint32x4_t) __a;
4732 }
4733 
4734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u8(uint8x16_t __a)4735 vreinterpretq_u32_u8 (uint8x16_t __a)
4736 {
4737   return (uint32x4_t) __a;
4738 }
4739 
4740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u16(uint16x8_t __a)4741 vreinterpretq_u32_u16 (uint16x8_t __a)
4742 {
4743   return (uint32x4_t) __a;
4744 }
4745 
4746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u64(uint64x2_t __a)4747 vreinterpretq_u32_u64 (uint64x2_t __a)
4748 {
4749   return (uint32x4_t) __a;
4750 }
4751 
4752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p8(poly8x16_t __a)4753 vreinterpretq_u32_p8 (poly8x16_t __a)
4754 {
4755   return (uint32x4_t) __a;
4756 }
4757 
4758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p16(poly16x8_t __a)4759 vreinterpretq_u32_p16 (poly16x8_t __a)
4760 {
4761   return (uint32x4_t) __a;
4762 }
4763 
4764 /* vset_lane  */
4765 
4766 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vset_lane_f16(float16_t __elem,float16x4_t __vec,const int __index)4767 vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
4768 {
4769   return __aarch64_vset_lane_any (__elem, __vec, __index);
4770 }
4771 
4772 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vset_lane_f32(float32_t __elem,float32x2_t __vec,const int __index)4773 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
4774 {
4775   return __aarch64_vset_lane_any (__elem, __vec, __index);
4776 }
4777 
4778 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vset_lane_f64(float64_t __elem,float64x1_t __vec,const int __index)4779 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
4780 {
4781   return __aarch64_vset_lane_any (__elem, __vec, __index);
4782 }
4783 
4784 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vset_lane_p8(poly8_t __elem,poly8x8_t __vec,const int __index)4785 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
4786 {
4787   return __aarch64_vset_lane_any (__elem, __vec, __index);
4788 }
4789 
4790 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vset_lane_p16(poly16_t __elem,poly16x4_t __vec,const int __index)4791 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
4792 {
4793   return __aarch64_vset_lane_any (__elem, __vec, __index);
4794 }
4795 
4796 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vset_lane_s8(int8_t __elem,int8x8_t __vec,const int __index)4797 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
4798 {
4799   return __aarch64_vset_lane_any (__elem, __vec, __index);
4800 }
4801 
4802 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vset_lane_s16(int16_t __elem,int16x4_t __vec,const int __index)4803 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
4804 {
4805   return __aarch64_vset_lane_any (__elem, __vec, __index);
4806 }
4807 
4808 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vset_lane_s32(int32_t __elem,int32x2_t __vec,const int __index)4809 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
4810 {
4811   return __aarch64_vset_lane_any (__elem, __vec, __index);
4812 }
4813 
4814 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vset_lane_s64(int64_t __elem,int64x1_t __vec,const int __index)4815 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
4816 {
4817   return __aarch64_vset_lane_any (__elem, __vec, __index);
4818 }
4819 
4820 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vset_lane_u8(uint8_t __elem,uint8x8_t __vec,const int __index)4821 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
4822 {
4823   return __aarch64_vset_lane_any (__elem, __vec, __index);
4824 }
4825 
4826 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vset_lane_u16(uint16_t __elem,uint16x4_t __vec,const int __index)4827 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
4828 {
4829   return __aarch64_vset_lane_any (__elem, __vec, __index);
4830 }
4831 
4832 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vset_lane_u32(uint32_t __elem,uint32x2_t __vec,const int __index)4833 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
4834 {
4835   return __aarch64_vset_lane_any (__elem, __vec, __index);
4836 }
4837 
4838 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vset_lane_u64(uint64_t __elem,uint64x1_t __vec,const int __index)4839 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
4840 {
4841   return __aarch64_vset_lane_any (__elem, __vec, __index);
4842 }
4843 
4844 /* vsetq_lane  */
4845 
4846 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vsetq_lane_f16(float16_t __elem,float16x8_t __vec,const int __index)4847 vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
4848 {
4849   return __aarch64_vset_lane_any (__elem, __vec, __index);
4850 }
4851 
4852 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsetq_lane_f32(float32_t __elem,float32x4_t __vec,const int __index)4853 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
4854 {
4855   return __aarch64_vset_lane_any (__elem, __vec, __index);
4856 }
4857 
4858 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vsetq_lane_f64(float64_t __elem,float64x2_t __vec,const int __index)4859 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
4860 {
4861   return __aarch64_vset_lane_any (__elem, __vec, __index);
4862 }
4863 
4864 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vsetq_lane_p8(poly8_t __elem,poly8x16_t __vec,const int __index)4865 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
4866 {
4867   return __aarch64_vset_lane_any (__elem, __vec, __index);
4868 }
4869 
4870 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vsetq_lane_p16(poly16_t __elem,poly16x8_t __vec,const int __index)4871 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
4872 {
4873   return __aarch64_vset_lane_any (__elem, __vec, __index);
4874 }
4875 
4876 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsetq_lane_s8(int8_t __elem,int8x16_t __vec,const int __index)4877 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
4878 {
4879   return __aarch64_vset_lane_any (__elem, __vec, __index);
4880 }
4881 
4882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsetq_lane_s16(int16_t __elem,int16x8_t __vec,const int __index)4883 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
4884 {
4885   return __aarch64_vset_lane_any (__elem, __vec, __index);
4886 }
4887 
4888 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsetq_lane_s32(int32_t __elem,int32x4_t __vec,const int __index)4889 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
4890 {
4891   return __aarch64_vset_lane_any (__elem, __vec, __index);
4892 }
4893 
4894 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsetq_lane_s64(int64_t __elem,int64x2_t __vec,const int __index)4895 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
4896 {
4897   return __aarch64_vset_lane_any (__elem, __vec, __index);
4898 }
4899 
4900 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsetq_lane_u8(uint8_t __elem,uint8x16_t __vec,const int __index)4901 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
4902 {
4903   return __aarch64_vset_lane_any (__elem, __vec, __index);
4904 }
4905 
4906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsetq_lane_u16(uint16_t __elem,uint16x8_t __vec,const int __index)4907 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
4908 {
4909   return __aarch64_vset_lane_any (__elem, __vec, __index);
4910 }
4911 
4912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsetq_lane_u32(uint32_t __elem,uint32x4_t __vec,const int __index)4913 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
4914 {
4915   return __aarch64_vset_lane_any (__elem, __vec, __index);
4916 }
4917 
4918 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsetq_lane_u64(uint64_t __elem,uint64x2_t __vec,const int __index)4919 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
4920 {
4921   return __aarch64_vset_lane_any (__elem, __vec, __index);
4922 }
4923 
4924 #define __GET_LOW(__TYPE) \
4925   uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);  \
4926   uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0));  \
4927   return vreinterpret_##__TYPE##_u64 (lo);
4928 
4929 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vget_low_f16(float16x8_t __a)4930 vget_low_f16 (float16x8_t __a)
4931 {
4932   __GET_LOW (f16);
4933 }
4934 
4935 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32(float32x4_t __a)4936 vget_low_f32 (float32x4_t __a)
4937 {
4938   __GET_LOW (f32);
4939 }
4940 
4941 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vget_low_f64(float64x2_t __a)4942 vget_low_f64 (float64x2_t __a)
4943 {
4944   return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4945 }
4946 
4947 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vget_low_p8(poly8x16_t __a)4948 vget_low_p8 (poly8x16_t __a)
4949 {
4950   __GET_LOW (p8);
4951 }
4952 
4953 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vget_low_p16(poly16x8_t __a)4954 vget_low_p16 (poly16x8_t __a)
4955 {
4956   __GET_LOW (p16);
4957 }
4958 
4959 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_low_s8(int8x16_t __a)4960 vget_low_s8 (int8x16_t __a)
4961 {
4962   __GET_LOW (s8);
4963 }
4964 
4965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vget_low_s16(int16x8_t __a)4966 vget_low_s16 (int16x8_t __a)
4967 {
4968   __GET_LOW (s16);
4969 }
4970 
4971 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vget_low_s32(int32x4_t __a)4972 vget_low_s32 (int32x4_t __a)
4973 {
4974   __GET_LOW (s32);
4975 }
4976 
4977 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vget_low_s64(int64x2_t __a)4978 vget_low_s64 (int64x2_t __a)
4979 {
4980   __GET_LOW (s64);
4981 }
4982 
4983 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vget_low_u8(uint8x16_t __a)4984 vget_low_u8 (uint8x16_t __a)
4985 {
4986   __GET_LOW (u8);
4987 }
4988 
4989 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vget_low_u16(uint16x8_t __a)4990 vget_low_u16 (uint16x8_t __a)
4991 {
4992   __GET_LOW (u16);
4993 }
4994 
4995 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vget_low_u32(uint32x4_t __a)4996 vget_low_u32 (uint32x4_t __a)
4997 {
4998   __GET_LOW (u32);
4999 }
5000 
5001 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vget_low_u64(uint64x2_t __a)5002 vget_low_u64 (uint64x2_t __a)
5003 {
5004   return vcreate_u64 (vgetq_lane_u64 (__a, 0));
5005 }
5006 
5007 #undef __GET_LOW
5008 
5009 #define __GET_HIGH(__TYPE)					\
5010   uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
5011   uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
5012   return vreinterpret_##__TYPE##_u64 (hi);
5013 
5014 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vget_high_f16(float16x8_t __a)5015 vget_high_f16 (float16x8_t __a)
5016 {
5017   __GET_HIGH (f16);
5018 }
5019 
5020 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32(float32x4_t __a)5021 vget_high_f32 (float32x4_t __a)
5022 {
5023   __GET_HIGH (f32);
5024 }
5025 
5026 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vget_high_f64(float64x2_t __a)5027 vget_high_f64 (float64x2_t __a)
5028 {
5029   __GET_HIGH (f64);
5030 }
5031 
5032 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vget_high_p8(poly8x16_t __a)5033 vget_high_p8 (poly8x16_t __a)
5034 {
5035   __GET_HIGH (p8);
5036 }
5037 
5038 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vget_high_p16(poly16x8_t __a)5039 vget_high_p16 (poly16x8_t __a)
5040 {
5041   __GET_HIGH (p16);
5042 }
5043 
5044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_high_s8(int8x16_t __a)5045 vget_high_s8 (int8x16_t __a)
5046 {
5047   __GET_HIGH (s8);
5048 }
5049 
5050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vget_high_s16(int16x8_t __a)5051 vget_high_s16 (int16x8_t __a)
5052 {
5053   __GET_HIGH (s16);
5054 }
5055 
5056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vget_high_s32(int32x4_t __a)5057 vget_high_s32 (int32x4_t __a)
5058 {
5059   __GET_HIGH (s32);
5060 }
5061 
5062 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vget_high_s64(int64x2_t __a)5063 vget_high_s64 (int64x2_t __a)
5064 {
5065   __GET_HIGH (s64);
5066 }
5067 
5068 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vget_high_u8(uint8x16_t __a)5069 vget_high_u8 (uint8x16_t __a)
5070 {
5071   __GET_HIGH (u8);
5072 }
5073 
5074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vget_high_u16(uint16x8_t __a)5075 vget_high_u16 (uint16x8_t __a)
5076 {
5077   __GET_HIGH (u16);
5078 }
5079 
5080 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vget_high_u32(uint32x4_t __a)5081 vget_high_u32 (uint32x4_t __a)
5082 {
5083   __GET_HIGH (u32);
5084 }
5085 
5086 #undef __GET_HIGH
5087 
5088 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vget_high_u64(uint64x2_t __a)5089 vget_high_u64 (uint64x2_t __a)
5090 {
5091   return vcreate_u64 (vgetq_lane_u64 (__a, 1));
5092 }
5093 
5094 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcombine_s8(int8x8_t __a,int8x8_t __b)5095 vcombine_s8 (int8x8_t __a, int8x8_t __b)
5096 {
5097   return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
5098 }
5099 
5100 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcombine_s16(int16x4_t __a,int16x4_t __b)5101 vcombine_s16 (int16x4_t __a, int16x4_t __b)
5102 {
5103   return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
5104 }
5105 
5106 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcombine_s32(int32x2_t __a,int32x2_t __b)5107 vcombine_s32 (int32x2_t __a, int32x2_t __b)
5108 {
5109   return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
5110 }
5111 
5112 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcombine_s64(int64x1_t __a,int64x1_t __b)5113 vcombine_s64 (int64x1_t __a, int64x1_t __b)
5114 {
5115   return __builtin_aarch64_combinedi (__a[0], __b[0]);
5116 }
5117 
5118 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vcombine_f16(float16x4_t __a,float16x4_t __b)5119 vcombine_f16 (float16x4_t __a, float16x4_t __b)
5120 {
5121   return __builtin_aarch64_combinev4hf (__a, __b);
5122 }
5123 
5124 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32(float32x2_t __a,float32x2_t __b)5125 vcombine_f32 (float32x2_t __a, float32x2_t __b)
5126 {
5127   return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
5128 }
5129 
5130 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcombine_u8(uint8x8_t __a,uint8x8_t __b)5131 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
5132 {
5133   return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5134 						     (int8x8_t) __b);
5135 }
5136 
5137 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcombine_u16(uint16x4_t __a,uint16x4_t __b)5138 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
5139 {
5140   return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5141 						     (int16x4_t) __b);
5142 }
5143 
5144 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcombine_u32(uint32x2_t __a,uint32x2_t __b)5145 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
5146 {
5147   return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
5148 						     (int32x2_t) __b);
5149 }
5150 
5151 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcombine_u64(uint64x1_t __a,uint64x1_t __b)5152 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
5153 {
5154   return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
5155 }
5156 
5157 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcombine_f64(float64x1_t __a,float64x1_t __b)5158 vcombine_f64 (float64x1_t __a, float64x1_t __b)
5159 {
5160   return __builtin_aarch64_combinedf (__a[0], __b[0]);
5161 }
5162 
5163 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcombine_p8(poly8x8_t __a,poly8x8_t __b)5164 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
5165 {
5166   return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5167 						     (int8x8_t) __b);
5168 }
5169 
5170 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vcombine_p16(poly16x4_t __a,poly16x4_t __b)5171 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
5172 {
5173   return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5174 						     (int16x4_t) __b);
5175 }
5176 
5177 /* Start of temporary inline asm implementations.  */
5178 
5179 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)5180 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
5181 {
5182   int8x8_t result;
5183   __asm__ ("saba %0.8b,%2.8b,%3.8b"
5184            : "=w"(result)
5185            : "0"(a), "w"(b), "w"(c)
5186            : /* No clobbers */);
5187   return result;
5188 }
5189 
5190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)5191 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
5192 {
5193   int16x4_t result;
5194   __asm__ ("saba %0.4h,%2.4h,%3.4h"
5195            : "=w"(result)
5196            : "0"(a), "w"(b), "w"(c)
5197            : /* No clobbers */);
5198   return result;
5199 }
5200 
5201 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)5202 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
5203 {
5204   int32x2_t result;
5205   __asm__ ("saba %0.2s,%2.2s,%3.2s"
5206            : "=w"(result)
5207            : "0"(a), "w"(b), "w"(c)
5208            : /* No clobbers */);
5209   return result;
5210 }
5211 
5212 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)5213 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
5214 {
5215   uint8x8_t result;
5216   __asm__ ("uaba %0.8b,%2.8b,%3.8b"
5217            : "=w"(result)
5218            : "0"(a), "w"(b), "w"(c)
5219            : /* No clobbers */);
5220   return result;
5221 }
5222 
5223 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)5224 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
5225 {
5226   uint16x4_t result;
5227   __asm__ ("uaba %0.4h,%2.4h,%3.4h"
5228            : "=w"(result)
5229            : "0"(a), "w"(b), "w"(c)
5230            : /* No clobbers */);
5231   return result;
5232 }
5233 
5234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)5235 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
5236 {
5237   uint32x2_t result;
5238   __asm__ ("uaba %0.2s,%2.2s,%3.2s"
5239            : "=w"(result)
5240            : "0"(a), "w"(b), "w"(c)
5241            : /* No clobbers */);
5242   return result;
5243 }
5244 
5245 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)5246 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
5247 {
5248   int16x8_t result;
5249   __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
5250            : "=w"(result)
5251            : "0"(a), "w"(b), "w"(c)
5252            : /* No clobbers */);
5253   return result;
5254 }
5255 
5256 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)5257 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
5258 {
5259   int32x4_t result;
5260   __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
5261            : "=w"(result)
5262            : "0"(a), "w"(b), "w"(c)
5263            : /* No clobbers */);
5264   return result;
5265 }
5266 
5267 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)5268 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
5269 {
5270   int64x2_t result;
5271   __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
5272            : "=w"(result)
5273            : "0"(a), "w"(b), "w"(c)
5274            : /* No clobbers */);
5275   return result;
5276 }
5277 
5278 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)5279 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
5280 {
5281   uint16x8_t result;
5282   __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
5283            : "=w"(result)
5284            : "0"(a), "w"(b), "w"(c)
5285            : /* No clobbers */);
5286   return result;
5287 }
5288 
5289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)5290 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
5291 {
5292   uint32x4_t result;
5293   __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
5294            : "=w"(result)
5295            : "0"(a), "w"(b), "w"(c)
5296            : /* No clobbers */);
5297   return result;
5298 }
5299 
5300 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)5301 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
5302 {
5303   uint64x2_t result;
5304   __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
5305            : "=w"(result)
5306            : "0"(a), "w"(b), "w"(c)
5307            : /* No clobbers */);
5308   return result;
5309 }
5310 
5311 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)5312 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
5313 {
5314   int16x8_t result;
5315   __asm__ ("sabal %0.8h,%2.8b,%3.8b"
5316            : "=w"(result)
5317            : "0"(a), "w"(b), "w"(c)
5318            : /* No clobbers */);
5319   return result;
5320 }
5321 
5322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)5323 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
5324 {
5325   int32x4_t result;
5326   __asm__ ("sabal %0.4s,%2.4h,%3.4h"
5327            : "=w"(result)
5328            : "0"(a), "w"(b), "w"(c)
5329            : /* No clobbers */);
5330   return result;
5331 }
5332 
5333 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)5334 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
5335 {
5336   int64x2_t result;
5337   __asm__ ("sabal %0.2d,%2.2s,%3.2s"
5338            : "=w"(result)
5339            : "0"(a), "w"(b), "w"(c)
5340            : /* No clobbers */);
5341   return result;
5342 }
5343 
5344 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)5345 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
5346 {
5347   uint16x8_t result;
5348   __asm__ ("uabal %0.8h,%2.8b,%3.8b"
5349            : "=w"(result)
5350            : "0"(a), "w"(b), "w"(c)
5351            : /* No clobbers */);
5352   return result;
5353 }
5354 
5355 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)5356 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
5357 {
5358   uint32x4_t result;
5359   __asm__ ("uabal %0.4s,%2.4h,%3.4h"
5360            : "=w"(result)
5361            : "0"(a), "w"(b), "w"(c)
5362            : /* No clobbers */);
5363   return result;
5364 }
5365 
5366 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)5367 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
5368 {
5369   uint64x2_t result;
5370   __asm__ ("uabal %0.2d,%2.2s,%3.2s"
5371            : "=w"(result)
5372            : "0"(a), "w"(b), "w"(c)
5373            : /* No clobbers */);
5374   return result;
5375 }
5376 
5377 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)5378 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
5379 {
5380   int8x16_t result;
5381   __asm__ ("saba %0.16b,%2.16b,%3.16b"
5382            : "=w"(result)
5383            : "0"(a), "w"(b), "w"(c)
5384            : /* No clobbers */);
5385   return result;
5386 }
5387 
5388 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)5389 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
5390 {
5391   int16x8_t result;
5392   __asm__ ("saba %0.8h,%2.8h,%3.8h"
5393            : "=w"(result)
5394            : "0"(a), "w"(b), "w"(c)
5395            : /* No clobbers */);
5396   return result;
5397 }
5398 
5399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)5400 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
5401 {
5402   int32x4_t result;
5403   __asm__ ("saba %0.4s,%2.4s,%3.4s"
5404            : "=w"(result)
5405            : "0"(a), "w"(b), "w"(c)
5406            : /* No clobbers */);
5407   return result;
5408 }
5409 
5410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)5411 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5412 {
5413   uint8x16_t result;
5414   __asm__ ("uaba %0.16b,%2.16b,%3.16b"
5415            : "=w"(result)
5416            : "0"(a), "w"(b), "w"(c)
5417            : /* No clobbers */);
5418   return result;
5419 }
5420 
5421 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)5422 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5423 {
5424   uint16x8_t result;
5425   __asm__ ("uaba %0.8h,%2.8h,%3.8h"
5426            : "=w"(result)
5427            : "0"(a), "w"(b), "w"(c)
5428            : /* No clobbers */);
5429   return result;
5430 }
5431 
5432 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)5433 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5434 {
5435   uint32x4_t result;
5436   __asm__ ("uaba %0.4s,%2.4s,%3.4s"
5437            : "=w"(result)
5438            : "0"(a), "w"(b), "w"(c)
5439            : /* No clobbers */);
5440   return result;
5441 }
5442 
5443 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vabd_f32(float32x2_t a,float32x2_t b)5444 vabd_f32 (float32x2_t a, float32x2_t b)
5445 {
5446   float32x2_t result;
5447   __asm__ ("fabd %0.2s, %1.2s, %2.2s"
5448            : "=w"(result)
5449            : "w"(a), "w"(b)
5450            : /* No clobbers */);
5451   return result;
5452 }
5453 
5454 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabd_s8(int8x8_t a,int8x8_t b)5455 vabd_s8 (int8x8_t a, int8x8_t b)
5456 {
5457   int8x8_t result;
5458   __asm__ ("sabd %0.8b, %1.8b, %2.8b"
5459            : "=w"(result)
5460            : "w"(a), "w"(b)
5461            : /* No clobbers */);
5462   return result;
5463 }
5464 
5465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabd_s16(int16x4_t a,int16x4_t b)5466 vabd_s16 (int16x4_t a, int16x4_t b)
5467 {
5468   int16x4_t result;
5469   __asm__ ("sabd %0.4h, %1.4h, %2.4h"
5470            : "=w"(result)
5471            : "w"(a), "w"(b)
5472            : /* No clobbers */);
5473   return result;
5474 }
5475 
5476 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabd_s32(int32x2_t a,int32x2_t b)5477 vabd_s32 (int32x2_t a, int32x2_t b)
5478 {
5479   int32x2_t result;
5480   __asm__ ("sabd %0.2s, %1.2s, %2.2s"
5481            : "=w"(result)
5482            : "w"(a), "w"(b)
5483            : /* No clobbers */);
5484   return result;
5485 }
5486 
5487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vabd_u8(uint8x8_t a,uint8x8_t b)5488 vabd_u8 (uint8x8_t a, uint8x8_t b)
5489 {
5490   uint8x8_t result;
5491   __asm__ ("uabd %0.8b, %1.8b, %2.8b"
5492            : "=w"(result)
5493            : "w"(a), "w"(b)
5494            : /* No clobbers */);
5495   return result;
5496 }
5497 
5498 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vabd_u16(uint16x4_t a,uint16x4_t b)5499 vabd_u16 (uint16x4_t a, uint16x4_t b)
5500 {
5501   uint16x4_t result;
5502   __asm__ ("uabd %0.4h, %1.4h, %2.4h"
5503            : "=w"(result)
5504            : "w"(a), "w"(b)
5505            : /* No clobbers */);
5506   return result;
5507 }
5508 
5509 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vabd_u32(uint32x2_t a,uint32x2_t b)5510 vabd_u32 (uint32x2_t a, uint32x2_t b)
5511 {
5512   uint32x2_t result;
5513   __asm__ ("uabd %0.2s, %1.2s, %2.2s"
5514            : "=w"(result)
5515            : "w"(a), "w"(b)
5516            : /* No clobbers */);
5517   return result;
5518 }
5519 
5520 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vabdd_f64(float64_t a,float64_t b)5521 vabdd_f64 (float64_t a, float64_t b)
5522 {
5523   float64_t result;
5524   __asm__ ("fabd %d0, %d1, %d2"
5525            : "=w"(result)
5526            : "w"(a), "w"(b)
5527            : /* No clobbers */);
5528   return result;
5529 }
5530 
5531 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdl_high_s8(int8x16_t a,int8x16_t b)5532 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5533 {
5534   int16x8_t result;
5535   __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5536            : "=w"(result)
5537            : "w"(a), "w"(b)
5538            : /* No clobbers */);
5539   return result;
5540 }
5541 
5542 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdl_high_s16(int16x8_t a,int16x8_t b)5543 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5544 {
5545   int32x4_t result;
5546   __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5547            : "=w"(result)
5548            : "w"(a), "w"(b)
5549            : /* No clobbers */);
5550   return result;
5551 }
5552 
5553 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabdl_high_s32(int32x4_t a,int32x4_t b)5554 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5555 {
5556   int64x2_t result;
5557   __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5558            : "=w"(result)
5559            : "w"(a), "w"(b)
5560            : /* No clobbers */);
5561   return result;
5562 }
5563 
5564 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdl_high_u8(uint8x16_t a,uint8x16_t b)5565 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5566 {
5567   uint16x8_t result;
5568   __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5569            : "=w"(result)
5570            : "w"(a), "w"(b)
5571            : /* No clobbers */);
5572   return result;
5573 }
5574 
5575 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdl_high_u16(uint16x8_t a,uint16x8_t b)5576 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5577 {
5578   uint32x4_t result;
5579   __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5580            : "=w"(result)
5581            : "w"(a), "w"(b)
5582            : /* No clobbers */);
5583   return result;
5584 }
5585 
5586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabdl_high_u32(uint32x4_t a,uint32x4_t b)5587 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5588 {
5589   uint64x2_t result;
5590   __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5591            : "=w"(result)
5592            : "w"(a), "w"(b)
5593            : /* No clobbers */);
5594   return result;
5595 }
5596 
5597 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdl_s8(int8x8_t a,int8x8_t b)5598 vabdl_s8 (int8x8_t a, int8x8_t b)
5599 {
5600   int16x8_t result;
5601   __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5602            : "=w"(result)
5603            : "w"(a), "w"(b)
5604            : /* No clobbers */);
5605   return result;
5606 }
5607 
5608 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdl_s16(int16x4_t a,int16x4_t b)5609 vabdl_s16 (int16x4_t a, int16x4_t b)
5610 {
5611   int32x4_t result;
5612   __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5613            : "=w"(result)
5614            : "w"(a), "w"(b)
5615            : /* No clobbers */);
5616   return result;
5617 }
5618 
5619 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabdl_s32(int32x2_t a,int32x2_t b)5620 vabdl_s32 (int32x2_t a, int32x2_t b)
5621 {
5622   int64x2_t result;
5623   __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5624            : "=w"(result)
5625            : "w"(a), "w"(b)
5626            : /* No clobbers */);
5627   return result;
5628 }
5629 
5630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdl_u8(uint8x8_t a,uint8x8_t b)5631 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5632 {
5633   uint16x8_t result;
5634   __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5635            : "=w"(result)
5636            : "w"(a), "w"(b)
5637            : /* No clobbers */);
5638   return result;
5639 }
5640 
5641 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdl_u16(uint16x4_t a,uint16x4_t b)5642 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5643 {
5644   uint32x4_t result;
5645   __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5646            : "=w"(result)
5647            : "w"(a), "w"(b)
5648            : /* No clobbers */);
5649   return result;
5650 }
5651 
5652 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabdl_u32(uint32x2_t a,uint32x2_t b)5653 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5654 {
5655   uint64x2_t result;
5656   __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5657            : "=w"(result)
5658            : "w"(a), "w"(b)
5659            : /* No clobbers */);
5660   return result;
5661 }
5662 
5663 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vabdq_f32(float32x4_t a,float32x4_t b)5664 vabdq_f32 (float32x4_t a, float32x4_t b)
5665 {
5666   float32x4_t result;
5667   __asm__ ("fabd %0.4s, %1.4s, %2.4s"
5668            : "=w"(result)
5669            : "w"(a), "w"(b)
5670            : /* No clobbers */);
5671   return result;
5672 }
5673 
5674 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vabdq_f64(float64x2_t a,float64x2_t b)5675 vabdq_f64 (float64x2_t a, float64x2_t b)
5676 {
5677   float64x2_t result;
5678   __asm__ ("fabd %0.2d, %1.2d, %2.2d"
5679            : "=w"(result)
5680            : "w"(a), "w"(b)
5681            : /* No clobbers */);
5682   return result;
5683 }
5684 
5685 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabdq_s8(int8x16_t a,int8x16_t b)5686 vabdq_s8 (int8x16_t a, int8x16_t b)
5687 {
5688   int8x16_t result;
5689   __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5690            : "=w"(result)
5691            : "w"(a), "w"(b)
5692            : /* No clobbers */);
5693   return result;
5694 }
5695 
5696 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdq_s16(int16x8_t a,int16x8_t b)5697 vabdq_s16 (int16x8_t a, int16x8_t b)
5698 {
5699   int16x8_t result;
5700   __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5701            : "=w"(result)
5702            : "w"(a), "w"(b)
5703            : /* No clobbers */);
5704   return result;
5705 }
5706 
5707 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdq_s32(int32x4_t a,int32x4_t b)5708 vabdq_s32 (int32x4_t a, int32x4_t b)
5709 {
5710   int32x4_t result;
5711   __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5712            : "=w"(result)
5713            : "w"(a), "w"(b)
5714            : /* No clobbers */);
5715   return result;
5716 }
5717 
5718 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vabdq_u8(uint8x16_t a,uint8x16_t b)5719 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5720 {
5721   uint8x16_t result;
5722   __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5723            : "=w"(result)
5724            : "w"(a), "w"(b)
5725            : /* No clobbers */);
5726   return result;
5727 }
5728 
5729 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdq_u16(uint16x8_t a,uint16x8_t b)5730 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5731 {
5732   uint16x8_t result;
5733   __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5734            : "=w"(result)
5735            : "w"(a), "w"(b)
5736            : /* No clobbers */);
5737   return result;
5738 }
5739 
5740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdq_u32(uint32x4_t a,uint32x4_t b)5741 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5742 {
5743   uint32x4_t result;
5744   __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5745            : "=w"(result)
5746            : "w"(a), "w"(b)
5747            : /* No clobbers */);
5748   return result;
5749 }
5750 
5751 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vabds_f32(float32_t a,float32_t b)5752 vabds_f32 (float32_t a, float32_t b)
5753 {
5754   float32_t result;
5755   __asm__ ("fabd %s0, %s1, %s2"
5756            : "=w"(result)
5757            : "w"(a), "w"(b)
5758            : /* No clobbers */);
5759   return result;
5760 }
5761 
5762 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlv_s8(int8x8_t a)5763 vaddlv_s8 (int8x8_t a)
5764 {
5765   int16_t result;
5766   __asm__ ("saddlv %h0,%1.8b"
5767            : "=w"(result)
5768            : "w"(a)
5769            : /* No clobbers */);
5770   return result;
5771 }
5772 
5773 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddlv_s16(int16x4_t a)5774 vaddlv_s16 (int16x4_t a)
5775 {
5776   int32_t result;
5777   __asm__ ("saddlv %s0,%1.4h"
5778            : "=w"(result)
5779            : "w"(a)
5780            : /* No clobbers */);
5781   return result;
5782 }
5783 
5784 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddlv_u8(uint8x8_t a)5785 vaddlv_u8 (uint8x8_t a)
5786 {
5787   uint16_t result;
5788   __asm__ ("uaddlv %h0,%1.8b"
5789            : "=w"(result)
5790            : "w"(a)
5791            : /* No clobbers */);
5792   return result;
5793 }
5794 
5795 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddlv_u16(uint16x4_t a)5796 vaddlv_u16 (uint16x4_t a)
5797 {
5798   uint32_t result;
5799   __asm__ ("uaddlv %s0,%1.4h"
5800            : "=w"(result)
5801            : "w"(a)
5802            : /* No clobbers */);
5803   return result;
5804 }
5805 
5806 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlvq_s8(int8x16_t a)5807 vaddlvq_s8 (int8x16_t a)
5808 {
5809   int16_t result;
5810   __asm__ ("saddlv %h0,%1.16b"
5811            : "=w"(result)
5812            : "w"(a)
5813            : /* No clobbers */);
5814   return result;
5815 }
5816 
5817 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddlvq_s16(int16x8_t a)5818 vaddlvq_s16 (int16x8_t a)
5819 {
5820   int32_t result;
5821   __asm__ ("saddlv %s0,%1.8h"
5822            : "=w"(result)
5823            : "w"(a)
5824            : /* No clobbers */);
5825   return result;
5826 }
5827 
5828 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddlvq_s32(int32x4_t a)5829 vaddlvq_s32 (int32x4_t a)
5830 {
5831   int64_t result;
5832   __asm__ ("saddlv %d0,%1.4s"
5833            : "=w"(result)
5834            : "w"(a)
5835            : /* No clobbers */);
5836   return result;
5837 }
5838 
5839 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddlvq_u8(uint8x16_t a)5840 vaddlvq_u8 (uint8x16_t a)
5841 {
5842   uint16_t result;
5843   __asm__ ("uaddlv %h0,%1.16b"
5844            : "=w"(result)
5845            : "w"(a)
5846            : /* No clobbers */);
5847   return result;
5848 }
5849 
5850 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddlvq_u16(uint16x8_t a)5851 vaddlvq_u16 (uint16x8_t a)
5852 {
5853   uint32_t result;
5854   __asm__ ("uaddlv %s0,%1.8h"
5855            : "=w"(result)
5856            : "w"(a)
5857            : /* No clobbers */);
5858   return result;
5859 }
5860 
5861 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddlvq_u32(uint32x4_t a)5862 vaddlvq_u32 (uint32x4_t a)
5863 {
5864   uint64_t result;
5865   __asm__ ("uaddlv %d0,%1.4s"
5866            : "=w"(result)
5867            : "w"(a)
5868            : /* No clobbers */);
5869   return result;
5870 }
5871 
5872 #define vcopyq_lane_f32(a, b, c, d)                                     \
5873   __extension__                                                         \
5874     ({                                                                  \
5875        float32x4_t c_ = (c);                                            \
5876        float32x4_t a_ = (a);                                            \
5877        float32x4_t result;                                              \
5878        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
5879                 : "=w"(result)                                          \
5880                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5881                 : /* No clobbers */);                                   \
5882        result;                                                          \
5883      })
5884 
5885 #define vcopyq_lane_f64(a, b, c, d)                                     \
5886   __extension__                                                         \
5887     ({                                                                  \
5888        float64x2_t c_ = (c);                                            \
5889        float64x2_t a_ = (a);                                            \
5890        float64x2_t result;                                              \
5891        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
5892                 : "=w"(result)                                          \
5893                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5894                 : /* No clobbers */);                                   \
5895        result;                                                          \
5896      })
5897 
5898 #define vcopyq_lane_p8(a, b, c, d)                                      \
5899   __extension__                                                         \
5900     ({                                                                  \
5901        poly8x16_t c_ = (c);                                             \
5902        poly8x16_t a_ = (a);                                             \
5903        poly8x16_t result;                                               \
5904        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5905                 : "=w"(result)                                          \
5906                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5907                 : /* No clobbers */);                                   \
5908        result;                                                          \
5909      })
5910 
5911 #define vcopyq_lane_p16(a, b, c, d)                                     \
5912   __extension__                                                         \
5913     ({                                                                  \
5914        poly16x8_t c_ = (c);                                             \
5915        poly16x8_t a_ = (a);                                             \
5916        poly16x8_t result;                                               \
5917        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5918                 : "=w"(result)                                          \
5919                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5920                 : /* No clobbers */);                                   \
5921        result;                                                          \
5922      })
5923 
5924 #define vcopyq_lane_s8(a, b, c, d)                                      \
5925   __extension__                                                         \
5926     ({                                                                  \
5927        int8x16_t c_ = (c);                                              \
5928        int8x16_t a_ = (a);                                              \
5929        int8x16_t result;                                                \
5930        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5931                 : "=w"(result)                                          \
5932                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5933                 : /* No clobbers */);                                   \
5934        result;                                                          \
5935      })
5936 
5937 #define vcopyq_lane_s16(a, b, c, d)                                     \
5938   __extension__                                                         \
5939     ({                                                                  \
5940        int16x8_t c_ = (c);                                              \
5941        int16x8_t a_ = (a);                                              \
5942        int16x8_t result;                                                \
5943        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5944                 : "=w"(result)                                          \
5945                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5946                 : /* No clobbers */);                                   \
5947        result;                                                          \
5948      })
5949 
5950 #define vcopyq_lane_s32(a, b, c, d)                                     \
5951   __extension__                                                         \
5952     ({                                                                  \
5953        int32x4_t c_ = (c);                                              \
5954        int32x4_t a_ = (a);                                              \
5955        int32x4_t result;                                                \
5956        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
5957                 : "=w"(result)                                          \
5958                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5959                 : /* No clobbers */);                                   \
5960        result;                                                          \
5961      })
5962 
5963 #define vcopyq_lane_s64(a, b, c, d)                                     \
5964   __extension__                                                         \
5965     ({                                                                  \
5966        int64x2_t c_ = (c);                                              \
5967        int64x2_t a_ = (a);                                              \
5968        int64x2_t result;                                                \
5969        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
5970                 : "=w"(result)                                          \
5971                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5972                 : /* No clobbers */);                                   \
5973        result;                                                          \
5974      })
5975 
5976 #define vcopyq_lane_u8(a, b, c, d)                                      \
5977   __extension__                                                         \
5978     ({                                                                  \
5979        uint8x16_t c_ = (c);                                             \
5980        uint8x16_t a_ = (a);                                             \
5981        uint8x16_t result;                                               \
5982        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5983                 : "=w"(result)                                          \
5984                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5985                 : /* No clobbers */);                                   \
5986        result;                                                          \
5987      })
5988 
5989 #define vcopyq_lane_u16(a, b, c, d)                                     \
5990   __extension__                                                         \
5991     ({                                                                  \
5992        uint16x8_t c_ = (c);                                             \
5993        uint16x8_t a_ = (a);                                             \
5994        uint16x8_t result;                                               \
5995        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5996                 : "=w"(result)                                          \
5997                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5998                 : /* No clobbers */);                                   \
5999        result;                                                          \
6000      })
6001 
6002 #define vcopyq_lane_u32(a, b, c, d)                                     \
6003   __extension__                                                         \
6004     ({                                                                  \
6005        uint32x4_t c_ = (c);                                             \
6006        uint32x4_t a_ = (a);                                             \
6007        uint32x4_t result;                                               \
6008        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
6009                 : "=w"(result)                                          \
6010                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
6011                 : /* No clobbers */);                                   \
6012        result;                                                          \
6013      })
6014 
6015 #define vcopyq_lane_u64(a, b, c, d)                                     \
6016   __extension__                                                         \
6017     ({                                                                  \
6018        uint64x2_t c_ = (c);                                             \
6019        uint64x2_t a_ = (a);                                             \
6020        uint64x2_t result;                                               \
6021        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
6022                 : "=w"(result)                                          \
6023                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
6024                 : /* No clobbers */);                                   \
6025        result;                                                          \
6026      })
6027 
6028 #define vcvt_n_f32_s32(a, b)                                            \
6029   __extension__                                                         \
6030     ({                                                                  \
6031        int32x2_t a_ = (a);                                              \
6032        float32x2_t result;                                              \
6033        __asm__ ("scvtf %0.2s, %1.2s, #%2"                               \
6034                 : "=w"(result)                                          \
6035                 : "w"(a_), "i"(b)                                       \
6036                 : /* No clobbers */);                                   \
6037        result;                                                          \
6038      })
6039 
6040 #define vcvt_n_f32_u32(a, b)                                            \
6041   __extension__                                                         \
6042     ({                                                                  \
6043        uint32x2_t a_ = (a);                                             \
6044        float32x2_t result;                                              \
6045        __asm__ ("ucvtf %0.2s, %1.2s, #%2"                               \
6046                 : "=w"(result)                                          \
6047                 : "w"(a_), "i"(b)                                       \
6048                 : /* No clobbers */);                                   \
6049        result;                                                          \
6050      })
6051 
6052 #define vcvt_n_s32_f32(a, b)                                            \
6053   __extension__                                                         \
6054     ({                                                                  \
6055        float32x2_t a_ = (a);                                            \
6056        int32x2_t result;                                                \
6057        __asm__ ("fcvtzs %0.2s, %1.2s, #%2"                              \
6058                 : "=w"(result)                                          \
6059                 : "w"(a_), "i"(b)                                       \
6060                 : /* No clobbers */);                                   \
6061        result;                                                          \
6062      })
6063 
6064 #define vcvt_n_u32_f32(a, b)                                            \
6065   __extension__                                                         \
6066     ({                                                                  \
6067        float32x2_t a_ = (a);                                            \
6068        uint32x2_t result;                                               \
6069        __asm__ ("fcvtzu %0.2s, %1.2s, #%2"                              \
6070                 : "=w"(result)                                          \
6071                 : "w"(a_), "i"(b)                                       \
6072                 : /* No clobbers */);                                   \
6073        result;                                                          \
6074      })
6075 
6076 #define vcvtd_n_f64_s64(a, b)                                           \
6077   __extension__                                                         \
6078     ({                                                                  \
6079        int64_t a_ = (a);                                                \
6080        float64_t result;                                                \
6081        __asm__ ("scvtf %d0,%d1,%2"                                      \
6082                 : "=w"(result)                                          \
6083                 : "w"(a_), "i"(b)                                       \
6084                 : /* No clobbers */);                                   \
6085        result;                                                          \
6086      })
6087 
6088 #define vcvtd_n_f64_u64(a, b)                                           \
6089   __extension__                                                         \
6090     ({                                                                  \
6091        uint64_t a_ = (a);                                               \
6092        float64_t result;                                                \
6093        __asm__ ("ucvtf %d0,%d1,%2"                                      \
6094                 : "=w"(result)                                          \
6095                 : "w"(a_), "i"(b)                                       \
6096                 : /* No clobbers */);                                   \
6097        result;                                                          \
6098      })
6099 
6100 #define vcvtd_n_s64_f64(a, b)                                           \
6101   __extension__                                                         \
6102     ({                                                                  \
6103        float64_t a_ = (a);                                              \
6104        int64_t result;                                                  \
6105        __asm__ ("fcvtzs %d0,%d1,%2"                                     \
6106                 : "=w"(result)                                          \
6107                 : "w"(a_), "i"(b)                                       \
6108                 : /* No clobbers */);                                   \
6109        result;                                                          \
6110      })
6111 
6112 #define vcvtd_n_u64_f64(a, b)                                           \
6113   __extension__                                                         \
6114     ({                                                                  \
6115        float64_t a_ = (a);                                              \
6116        uint64_t result;                                                 \
6117        __asm__ ("fcvtzu %d0,%d1,%2"                                     \
6118                 : "=w"(result)                                          \
6119                 : "w"(a_), "i"(b)                                       \
6120                 : /* No clobbers */);                                   \
6121        result;                                                          \
6122      })
6123 
6124 #define vcvtq_n_f32_s32(a, b)                                           \
6125   __extension__                                                         \
6126     ({                                                                  \
6127        int32x4_t a_ = (a);                                              \
6128        float32x4_t result;                                              \
6129        __asm__ ("scvtf %0.4s, %1.4s, #%2"                               \
6130                 : "=w"(result)                                          \
6131                 : "w"(a_), "i"(b)                                       \
6132                 : /* No clobbers */);                                   \
6133        result;                                                          \
6134      })
6135 
6136 #define vcvtq_n_f32_u32(a, b)                                           \
6137   __extension__                                                         \
6138     ({                                                                  \
6139        uint32x4_t a_ = (a);                                             \
6140        float32x4_t result;                                              \
6141        __asm__ ("ucvtf %0.4s, %1.4s, #%2"                               \
6142                 : "=w"(result)                                          \
6143                 : "w"(a_), "i"(b)                                       \
6144                 : /* No clobbers */);                                   \
6145        result;                                                          \
6146      })
6147 
6148 #define vcvtq_n_f64_s64(a, b)                                           \
6149   __extension__                                                         \
6150     ({                                                                  \
6151        int64x2_t a_ = (a);                                              \
6152        float64x2_t result;                                              \
6153        __asm__ ("scvtf %0.2d, %1.2d, #%2"                               \
6154                 : "=w"(result)                                          \
6155                 : "w"(a_), "i"(b)                                       \
6156                 : /* No clobbers */);                                   \
6157        result;                                                          \
6158      })
6159 
6160 #define vcvtq_n_f64_u64(a, b)                                           \
6161   __extension__                                                         \
6162     ({                                                                  \
6163        uint64x2_t a_ = (a);                                             \
6164        float64x2_t result;                                              \
6165        __asm__ ("ucvtf %0.2d, %1.2d, #%2"                               \
6166                 : "=w"(result)                                          \
6167                 : "w"(a_), "i"(b)                                       \
6168                 : /* No clobbers */);                                   \
6169        result;                                                          \
6170      })
6171 
6172 #define vcvtq_n_s32_f32(a, b)                                           \
6173   __extension__                                                         \
6174     ({                                                                  \
6175        float32x4_t a_ = (a);                                            \
6176        int32x4_t result;                                                \
6177        __asm__ ("fcvtzs %0.4s, %1.4s, #%2"                              \
6178                 : "=w"(result)                                          \
6179                 : "w"(a_), "i"(b)                                       \
6180                 : /* No clobbers */);                                   \
6181        result;                                                          \
6182      })
6183 
6184 #define vcvtq_n_s64_f64(a, b)                                           \
6185   __extension__                                                         \
6186     ({                                                                  \
6187        float64x2_t a_ = (a);                                            \
6188        int64x2_t result;                                                \
6189        __asm__ ("fcvtzs %0.2d, %1.2d, #%2"                              \
6190                 : "=w"(result)                                          \
6191                 : "w"(a_), "i"(b)                                       \
6192                 : /* No clobbers */);                                   \
6193        result;                                                          \
6194      })
6195 
6196 #define vcvtq_n_u32_f32(a, b)                                           \
6197   __extension__                                                         \
6198     ({                                                                  \
6199        float32x4_t a_ = (a);                                            \
6200        uint32x4_t result;                                               \
6201        __asm__ ("fcvtzu %0.4s, %1.4s, #%2"                              \
6202                 : "=w"(result)                                          \
6203                 : "w"(a_), "i"(b)                                       \
6204                 : /* No clobbers */);                                   \
6205        result;                                                          \
6206      })
6207 
6208 #define vcvtq_n_u64_f64(a, b)                                           \
6209   __extension__                                                         \
6210     ({                                                                  \
6211        float64x2_t a_ = (a);                                            \
6212        uint64x2_t result;                                               \
6213        __asm__ ("fcvtzu %0.2d, %1.2d, #%2"                              \
6214                 : "=w"(result)                                          \
6215                 : "w"(a_), "i"(b)                                       \
6216                 : /* No clobbers */);                                   \
6217        result;                                                          \
6218      })
6219 
6220 #define vcvts_n_f32_s32(a, b)                                           \
6221   __extension__                                                         \
6222     ({                                                                  \
6223        int32_t a_ = (a);                                                \
6224        float32_t result;                                                \
6225        __asm__ ("scvtf %s0,%s1,%2"                                      \
6226                 : "=w"(result)                                          \
6227                 : "w"(a_), "i"(b)                                       \
6228                 : /* No clobbers */);                                   \
6229        result;                                                          \
6230      })
6231 
6232 #define vcvts_n_f32_u32(a, b)                                           \
6233   __extension__                                                         \
6234     ({                                                                  \
6235        uint32_t a_ = (a);                                               \
6236        float32_t result;                                                \
6237        __asm__ ("ucvtf %s0,%s1,%2"                                      \
6238                 : "=w"(result)                                          \
6239                 : "w"(a_), "i"(b)                                       \
6240                 : /* No clobbers */);                                   \
6241        result;                                                          \
6242      })
6243 
6244 #define vcvts_n_s32_f32(a, b)                                           \
6245   __extension__                                                         \
6246     ({                                                                  \
6247        float32_t a_ = (a);                                              \
6248        int32_t result;                                                  \
6249        __asm__ ("fcvtzs %s0,%s1,%2"                                     \
6250                 : "=w"(result)                                          \
6251                 : "w"(a_), "i"(b)                                       \
6252                 : /* No clobbers */);                                   \
6253        result;                                                          \
6254      })
6255 
6256 #define vcvts_n_u32_f32(a, b)                                           \
6257   __extension__                                                         \
6258     ({                                                                  \
6259        float32_t a_ = (a);                                              \
6260        uint32_t result;                                                 \
6261        __asm__ ("fcvtzu %s0,%s1,%2"                                     \
6262                 : "=w"(result)                                          \
6263                 : "w"(a_), "i"(b)                                       \
6264                 : /* No clobbers */);                                   \
6265        result;                                                          \
6266      })
6267 
6268 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvtx_f32_f64(float64x2_t a)6269 vcvtx_f32_f64 (float64x2_t a)
6270 {
6271   float32x2_t result;
6272   __asm__ ("fcvtxn %0.2s,%1.2d"
6273            : "=w"(result)
6274            : "w"(a)
6275            : /* No clobbers */);
6276   return result;
6277 }
6278 
6279 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtx_high_f32_f64(float32x2_t a,float64x2_t b)6280 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
6281 {
6282   float32x4_t result;
6283   __asm__ ("fcvtxn2 %0.4s,%1.2d"
6284            : "=w"(result)
6285            : "w" (b), "0"(a)
6286            : /* No clobbers */);
6287   return result;
6288 }
6289 
6290 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtxd_f32_f64(float64_t a)6291 vcvtxd_f32_f64 (float64_t a)
6292 {
6293   float32_t result;
6294   __asm__ ("fcvtxn %s0,%d1"
6295            : "=w"(result)
6296            : "w"(a)
6297            : /* No clobbers */);
6298   return result;
6299 }
6300 
6301 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)6302 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6303 {
6304   float32x2_t result;
6305   float32x2_t t1;
6306   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6307            : "=w"(result), "=w"(t1)
6308            : "0"(a), "w"(b), "w"(c)
6309            : /* No clobbers */);
6310   return result;
6311 }
6312 
6313 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)6314 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6315 {
6316   int16x4_t result;
6317   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6318            : "=w"(result)
6319            : "0"(a), "w"(b), "x"(c)
6320            : /* No clobbers */);
6321   return result;
6322 }
6323 
6324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)6325 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6326 {
6327   int32x2_t result;
6328   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6329            : "=w"(result)
6330            : "0"(a), "w"(b), "w"(c)
6331            : /* No clobbers */);
6332   return result;
6333 }
6334 
6335 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)6336 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6337 {
6338   uint16x4_t result;
6339   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6340            : "=w"(result)
6341            : "0"(a), "w"(b), "x"(c)
6342            : /* No clobbers */);
6343   return result;
6344 }
6345 
6346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)6347 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6348 {
6349   uint32x2_t result;
6350   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6351            : "=w"(result)
6352            : "0"(a), "w"(b), "w"(c)
6353            : /* No clobbers */);
6354   return result;
6355 }
6356 
6357 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)6358 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6359 {
6360   int8x8_t result;
6361   __asm__ ("mla %0.8b, %2.8b, %3.8b"
6362            : "=w"(result)
6363            : "0"(a), "w"(b), "w"(c)
6364            : /* No clobbers */);
6365   return result;
6366 }
6367 
6368 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)6369 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6370 {
6371   int16x4_t result;
6372   __asm__ ("mla %0.4h, %2.4h, %3.4h"
6373            : "=w"(result)
6374            : "0"(a), "w"(b), "w"(c)
6375            : /* No clobbers */);
6376   return result;
6377 }
6378 
6379 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)6380 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6381 {
6382   int32x2_t result;
6383   __asm__ ("mla %0.2s, %2.2s, %3.2s"
6384            : "=w"(result)
6385            : "0"(a), "w"(b), "w"(c)
6386            : /* No clobbers */);
6387   return result;
6388 }
6389 
6390 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)6391 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6392 {
6393   uint8x8_t result;
6394   __asm__ ("mla %0.8b, %2.8b, %3.8b"
6395            : "=w"(result)
6396            : "0"(a), "w"(b), "w"(c)
6397            : /* No clobbers */);
6398   return result;
6399 }
6400 
6401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)6402 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6403 {
6404   uint16x4_t result;
6405   __asm__ ("mla %0.4h, %2.4h, %3.4h"
6406            : "=w"(result)
6407            : "0"(a), "w"(b), "w"(c)
6408            : /* No clobbers */);
6409   return result;
6410 }
6411 
6412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)6413 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6414 {
6415   uint32x2_t result;
6416   __asm__ ("mla %0.2s, %2.2s, %3.2s"
6417            : "=w"(result)
6418            : "0"(a), "w"(b), "w"(c)
6419            : /* No clobbers */);
6420   return result;
6421 }
6422 
6423 #define vmlal_high_lane_s16(a, b, c, d)                                 \
6424   __extension__                                                         \
6425     ({                                                                  \
6426        int16x4_t c_ = (c);                                              \
6427        int16x8_t b_ = (b);                                              \
6428        int32x4_t a_ = (a);                                              \
6429        int32x4_t result;                                                \
6430        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
6431                 : "=w"(result)                                          \
6432                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6433                 : /* No clobbers */);                                   \
6434        result;                                                          \
6435      })
6436 
6437 #define vmlal_high_lane_s32(a, b, c, d)                                 \
6438   __extension__                                                         \
6439     ({                                                                  \
6440        int32x2_t c_ = (c);                                              \
6441        int32x4_t b_ = (b);                                              \
6442        int64x2_t a_ = (a);                                              \
6443        int64x2_t result;                                                \
6444        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
6445                 : "=w"(result)                                          \
6446                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6447                 : /* No clobbers */);                                   \
6448        result;                                                          \
6449      })
6450 
6451 #define vmlal_high_lane_u16(a, b, c, d)                                 \
6452   __extension__                                                         \
6453     ({                                                                  \
6454        uint16x4_t c_ = (c);                                             \
6455        uint16x8_t b_ = (b);                                             \
6456        uint32x4_t a_ = (a);                                             \
6457        uint32x4_t result;                                               \
6458        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
6459                 : "=w"(result)                                          \
6460                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6461                 : /* No clobbers */);                                   \
6462        result;                                                          \
6463      })
6464 
6465 #define vmlal_high_lane_u32(a, b, c, d)                                 \
6466   __extension__                                                         \
6467     ({                                                                  \
6468        uint32x2_t c_ = (c);                                             \
6469        uint32x4_t b_ = (b);                                             \
6470        uint64x2_t a_ = (a);                                             \
6471        uint64x2_t result;                                               \
6472        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
6473                 : "=w"(result)                                          \
6474                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6475                 : /* No clobbers */);                                   \
6476        result;                                                          \
6477      })
6478 
6479 #define vmlal_high_laneq_s16(a, b, c, d)                                \
6480   __extension__                                                         \
6481     ({                                                                  \
6482        int16x8_t c_ = (c);                                              \
6483        int16x8_t b_ = (b);                                              \
6484        int32x4_t a_ = (a);                                              \
6485        int32x4_t result;                                                \
6486        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
6487                 : "=w"(result)                                          \
6488                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6489                 : /* No clobbers */);                                   \
6490        result;                                                          \
6491      })
6492 
6493 #define vmlal_high_laneq_s32(a, b, c, d)                                \
6494   __extension__                                                         \
6495     ({                                                                  \
6496        int32x4_t c_ = (c);                                              \
6497        int32x4_t b_ = (b);                                              \
6498        int64x2_t a_ = (a);                                              \
6499        int64x2_t result;                                                \
6500        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
6501                 : "=w"(result)                                          \
6502                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6503                 : /* No clobbers */);                                   \
6504        result;                                                          \
6505      })
6506 
6507 #define vmlal_high_laneq_u16(a, b, c, d)                                \
6508   __extension__                                                         \
6509     ({                                                                  \
6510        uint16x8_t c_ = (c);                                             \
6511        uint16x8_t b_ = (b);                                             \
6512        uint32x4_t a_ = (a);                                             \
6513        uint32x4_t result;                                               \
6514        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
6515                 : "=w"(result)                                          \
6516                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6517                 : /* No clobbers */);                                   \
6518        result;                                                          \
6519      })
6520 
6521 #define vmlal_high_laneq_u32(a, b, c, d)                                \
6522   __extension__                                                         \
6523     ({                                                                  \
6524        uint32x4_t c_ = (c);                                             \
6525        uint32x4_t b_ = (b);                                             \
6526        uint64x2_t a_ = (a);                                             \
6527        uint64x2_t result;                                               \
6528        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
6529                 : "=w"(result)                                          \
6530                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6531                 : /* No clobbers */);                                   \
6532        result;                                                          \
6533      })
6534 
6535 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)6536 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6537 {
6538   int32x4_t result;
6539   __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6540            : "=w"(result)
6541            : "0"(a), "w"(b), "x"(c)
6542            : /* No clobbers */);
6543   return result;
6544 }
6545 
6546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)6547 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6548 {
6549   int64x2_t result;
6550   __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6551            : "=w"(result)
6552            : "0"(a), "w"(b), "w"(c)
6553            : /* No clobbers */);
6554   return result;
6555 }
6556 
6557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)6558 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6559 {
6560   uint32x4_t result;
6561   __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6562            : "=w"(result)
6563            : "0"(a), "w"(b), "x"(c)
6564            : /* No clobbers */);
6565   return result;
6566 }
6567 
6568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)6569 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6570 {
6571   uint64x2_t result;
6572   __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6573            : "=w"(result)
6574            : "0"(a), "w"(b), "w"(c)
6575            : /* No clobbers */);
6576   return result;
6577 }
6578 
6579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)6580 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6581 {
6582   int16x8_t result;
6583   __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6584            : "=w"(result)
6585            : "0"(a), "w"(b), "w"(c)
6586            : /* No clobbers */);
6587   return result;
6588 }
6589 
6590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)6591 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6592 {
6593   int32x4_t result;
6594   __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6595            : "=w"(result)
6596            : "0"(a), "w"(b), "w"(c)
6597            : /* No clobbers */);
6598   return result;
6599 }
6600 
6601 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)6602 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6603 {
6604   int64x2_t result;
6605   __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6606            : "=w"(result)
6607            : "0"(a), "w"(b), "w"(c)
6608            : /* No clobbers */);
6609   return result;
6610 }
6611 
6612 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)6613 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6614 {
6615   uint16x8_t result;
6616   __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6617            : "=w"(result)
6618            : "0"(a), "w"(b), "w"(c)
6619            : /* No clobbers */);
6620   return result;
6621 }
6622 
6623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)6624 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6625 {
6626   uint32x4_t result;
6627   __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6628            : "=w"(result)
6629            : "0"(a), "w"(b), "w"(c)
6630            : /* No clobbers */);
6631   return result;
6632 }
6633 
6634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)6635 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6636 {
6637   uint64x2_t result;
6638   __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6639            : "=w"(result)
6640            : "0"(a), "w"(b), "w"(c)
6641            : /* No clobbers */);
6642   return result;
6643 }
6644 
6645 #define vmlal_lane_s16(a, b, c, d)                                      \
6646   __extension__                                                         \
6647     ({                                                                  \
6648        int16x4_t c_ = (c);                                              \
6649        int16x4_t b_ = (b);                                              \
6650        int32x4_t a_ = (a);                                              \
6651        int32x4_t result;                                                \
6652        __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
6653                 : "=w"(result)                                          \
6654                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6655                 : /* No clobbers */);                                   \
6656        result;                                                          \
6657      })
6658 
6659 #define vmlal_lane_s32(a, b, c, d)                                      \
6660   __extension__                                                         \
6661     ({                                                                  \
6662        int32x2_t c_ = (c);                                              \
6663        int32x2_t b_ = (b);                                              \
6664        int64x2_t a_ = (a);                                              \
6665        int64x2_t result;                                                \
6666        __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
6667                 : "=w"(result)                                          \
6668                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6669                 : /* No clobbers */);                                   \
6670        result;                                                          \
6671      })
6672 
6673 #define vmlal_lane_u16(a, b, c, d)                                      \
6674   __extension__                                                         \
6675     ({                                                                  \
6676        uint16x4_t c_ = (c);                                             \
6677        uint16x4_t b_ = (b);                                             \
6678        uint32x4_t a_ = (a);                                             \
6679        uint32x4_t result;                                               \
6680        __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
6681                 : "=w"(result)                                          \
6682                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6683                 : /* No clobbers */);                                   \
6684        result;                                                          \
6685      })
6686 
6687 #define vmlal_lane_u32(a, b, c, d)                                      \
6688   __extension__                                                         \
6689     ({                                                                  \
6690        uint32x2_t c_ = (c);                                             \
6691        uint32x2_t b_ = (b);                                             \
6692        uint64x2_t a_ = (a);                                             \
6693        uint64x2_t result;                                               \
6694        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
6695                 : "=w"(result)                                          \
6696                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6697                 : /* No clobbers */);                                   \
6698        result;                                                          \
6699      })
6700 
6701 #define vmlal_laneq_s16(a, b, c, d)                                     \
6702   __extension__                                                         \
6703     ({                                                                  \
6704        int16x8_t c_ = (c);                                              \
6705        int16x4_t b_ = (b);                                              \
6706        int32x4_t a_ = (a);                                              \
6707        int32x4_t result;                                                \
6708        __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
6709                 : "=w"(result)                                          \
6710                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6711                 : /* No clobbers */);                                   \
6712        result;                                                          \
6713      })
6714 
6715 #define vmlal_laneq_s32(a, b, c, d)                                     \
6716   __extension__                                                         \
6717     ({                                                                  \
6718        int32x4_t c_ = (c);                                              \
6719        int32x2_t b_ = (b);                                              \
6720        int64x2_t a_ = (a);                                              \
6721        int64x2_t result;                                                \
6722        __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
6723                 : "=w"(result)                                          \
6724                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6725                 : /* No clobbers */);                                   \
6726        result;                                                          \
6727      })
6728 
6729 #define vmlal_laneq_u16(a, b, c, d)                                     \
6730   __extension__                                                         \
6731     ({                                                                  \
6732        uint16x8_t c_ = (c);                                             \
6733        uint16x4_t b_ = (b);                                             \
6734        uint32x4_t a_ = (a);                                             \
6735        uint32x4_t result;                                               \
6736        __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
6737                 : "=w"(result)                                          \
6738                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
6739                 : /* No clobbers */);                                   \
6740        result;                                                          \
6741      })
6742 
6743 #define vmlal_laneq_u32(a, b, c, d)                                     \
6744   __extension__                                                         \
6745     ({                                                                  \
6746        uint32x4_t c_ = (c);                                             \
6747        uint32x2_t b_ = (b);                                             \
6748        uint64x2_t a_ = (a);                                             \
6749        uint64x2_t result;                                               \
6750        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
6751                 : "=w"(result)                                          \
6752                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
6753                 : /* No clobbers */);                                   \
6754        result;                                                          \
6755      })
6756 
6757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)6758 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
6759 {
6760   int32x4_t result;
6761   __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
6762            : "=w"(result)
6763            : "0"(a), "w"(b), "x"(c)
6764            : /* No clobbers */);
6765   return result;
6766 }
6767 
6768 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)6769 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
6770 {
6771   int64x2_t result;
6772   __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
6773            : "=w"(result)
6774            : "0"(a), "w"(b), "w"(c)
6775            : /* No clobbers */);
6776   return result;
6777 }
6778 
6779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)6780 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
6781 {
6782   uint32x4_t result;
6783   __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
6784            : "=w"(result)
6785            : "0"(a), "w"(b), "x"(c)
6786            : /* No clobbers */);
6787   return result;
6788 }
6789 
6790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)6791 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
6792 {
6793   uint64x2_t result;
6794   __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
6795            : "=w"(result)
6796            : "0"(a), "w"(b), "w"(c)
6797            : /* No clobbers */);
6798   return result;
6799 }
6800 
6801 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)6802 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
6803 {
6804   int16x8_t result;
6805   __asm__ ("smlal %0.8h,%2.8b,%3.8b"
6806            : "=w"(result)
6807            : "0"(a), "w"(b), "w"(c)
6808            : /* No clobbers */);
6809   return result;
6810 }
6811 
6812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)6813 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
6814 {
6815   int32x4_t result;
6816   __asm__ ("smlal %0.4s,%2.4h,%3.4h"
6817            : "=w"(result)
6818            : "0"(a), "w"(b), "w"(c)
6819            : /* No clobbers */);
6820   return result;
6821 }
6822 
6823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)6824 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
6825 {
6826   int64x2_t result;
6827   __asm__ ("smlal %0.2d,%2.2s,%3.2s"
6828            : "=w"(result)
6829            : "0"(a), "w"(b), "w"(c)
6830            : /* No clobbers */);
6831   return result;
6832 }
6833 
6834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)6835 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
6836 {
6837   uint16x8_t result;
6838   __asm__ ("umlal %0.8h,%2.8b,%3.8b"
6839            : "=w"(result)
6840            : "0"(a), "w"(b), "w"(c)
6841            : /* No clobbers */);
6842   return result;
6843 }
6844 
6845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)6846 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
6847 {
6848   uint32x4_t result;
6849   __asm__ ("umlal %0.4s,%2.4h,%3.4h"
6850            : "=w"(result)
6851            : "0"(a), "w"(b), "w"(c)
6852            : /* No clobbers */);
6853   return result;
6854 }
6855 
6856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)6857 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
6858 {
6859   uint64x2_t result;
6860   __asm__ ("umlal %0.2d,%2.2s,%3.2s"
6861            : "=w"(result)
6862            : "0"(a), "w"(b), "w"(c)
6863            : /* No clobbers */);
6864   return result;
6865 }
6866 
6867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)6868 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6869 {
6870   float32x4_t result;
6871   float32x4_t t1;
6872   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
6873            : "=w"(result), "=w"(t1)
6874            : "0"(a), "w"(b), "w"(c)
6875            : /* No clobbers */);
6876   return result;
6877 }
6878 
6879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)6880 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
6881 {
6882   int16x8_t result;
6883   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6884            : "=w"(result)
6885            : "0"(a), "w"(b), "x"(c)
6886            : /* No clobbers */);
6887   return result;
6888 }
6889 
6890 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)6891 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
6892 {
6893   int32x4_t result;
6894   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6895            : "=w"(result)
6896            : "0"(a), "w"(b), "w"(c)
6897            : /* No clobbers */);
6898   return result;
6899 }
6900 
6901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)6902 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
6903 {
6904   uint16x8_t result;
6905   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6906            : "=w"(result)
6907            : "0"(a), "w"(b), "x"(c)
6908            : /* No clobbers */);
6909   return result;
6910 }
6911 
6912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)6913 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
6914 {
6915   uint32x4_t result;
6916   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6917            : "=w"(result)
6918            : "0"(a), "w"(b), "w"(c)
6919            : /* No clobbers */);
6920   return result;
6921 }
6922 
6923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)6924 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
6925 {
6926   int8x16_t result;
6927   __asm__ ("mla %0.16b, %2.16b, %3.16b"
6928            : "=w"(result)
6929            : "0"(a), "w"(b), "w"(c)
6930            : /* No clobbers */);
6931   return result;
6932 }
6933 
6934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)6935 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
6936 {
6937   int16x8_t result;
6938   __asm__ ("mla %0.8h, %2.8h, %3.8h"
6939            : "=w"(result)
6940            : "0"(a), "w"(b), "w"(c)
6941            : /* No clobbers */);
6942   return result;
6943 }
6944 
6945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)6946 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
6947 {
6948   int32x4_t result;
6949   __asm__ ("mla %0.4s, %2.4s, %3.4s"
6950            : "=w"(result)
6951            : "0"(a), "w"(b), "w"(c)
6952            : /* No clobbers */);
6953   return result;
6954 }
6955 
6956 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)6957 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
6958 {
6959   uint8x16_t result;
6960   __asm__ ("mla %0.16b, %2.16b, %3.16b"
6961            : "=w"(result)
6962            : "0"(a), "w"(b), "w"(c)
6963            : /* No clobbers */);
6964   return result;
6965 }
6966 
6967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)6968 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
6969 {
6970   uint16x8_t result;
6971   __asm__ ("mla %0.8h, %2.8h, %3.8h"
6972            : "=w"(result)
6973            : "0"(a), "w"(b), "w"(c)
6974            : /* No clobbers */);
6975   return result;
6976 }
6977 
6978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)6979 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
6980 {
6981   uint32x4_t result;
6982   __asm__ ("mla %0.4s, %2.4s, %3.4s"
6983            : "=w"(result)
6984            : "0"(a), "w"(b), "w"(c)
6985            : /* No clobbers */);
6986   return result;
6987 }
6988 
6989 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)6990 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6991 {
6992   float32x2_t result;
6993   float32x2_t t1;
6994   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
6995            : "=w"(result), "=w"(t1)
6996            : "0"(a), "w"(b), "w"(c)
6997            : /* No clobbers */);
6998   return result;
6999 }
7000 
7001 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)7002 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7003 {
7004   int16x4_t result;
7005   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7006            : "=w"(result)
7007            : "0"(a), "w"(b), "x"(c)
7008            : /* No clobbers */);
7009   return result;
7010 }
7011 
7012 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)7013 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7014 {
7015   int32x2_t result;
7016   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7017            : "=w"(result)
7018            : "0"(a), "w"(b), "w"(c)
7019            : /* No clobbers */);
7020   return result;
7021 }
7022 
7023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7024 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7025 {
7026   uint16x4_t result;
7027   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7028            : "=w"(result)
7029            : "0"(a), "w"(b), "x"(c)
7030            : /* No clobbers */);
7031   return result;
7032 }
7033 
7034 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7035 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7036 {
7037   uint32x2_t result;
7038   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7039            : "=w"(result)
7040            : "0"(a), "w"(b), "w"(c)
7041            : /* No clobbers */);
7042   return result;
7043 }
7044 
7045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)7046 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7047 {
7048   int8x8_t result;
7049   __asm__ ("mls %0.8b,%2.8b,%3.8b"
7050            : "=w"(result)
7051            : "0"(a), "w"(b), "w"(c)
7052            : /* No clobbers */);
7053   return result;
7054 }
7055 
7056 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)7057 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7058 {
7059   int16x4_t result;
7060   __asm__ ("mls %0.4h,%2.4h,%3.4h"
7061            : "=w"(result)
7062            : "0"(a), "w"(b), "w"(c)
7063            : /* No clobbers */);
7064   return result;
7065 }
7066 
7067 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)7068 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7069 {
7070   int32x2_t result;
7071   __asm__ ("mls %0.2s,%2.2s,%3.2s"
7072            : "=w"(result)
7073            : "0"(a), "w"(b), "w"(c)
7074            : /* No clobbers */);
7075   return result;
7076 }
7077 
7078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7079 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7080 {
7081   uint8x8_t result;
7082   __asm__ ("mls %0.8b,%2.8b,%3.8b"
7083            : "=w"(result)
7084            : "0"(a), "w"(b), "w"(c)
7085            : /* No clobbers */);
7086   return result;
7087 }
7088 
7089 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7090 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7091 {
7092   uint16x4_t result;
7093   __asm__ ("mls %0.4h,%2.4h,%3.4h"
7094            : "=w"(result)
7095            : "0"(a), "w"(b), "w"(c)
7096            : /* No clobbers */);
7097   return result;
7098 }
7099 
7100 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7101 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7102 {
7103   uint32x2_t result;
7104   __asm__ ("mls %0.2s,%2.2s,%3.2s"
7105            : "=w"(result)
7106            : "0"(a), "w"(b), "w"(c)
7107            : /* No clobbers */);
7108   return result;
7109 }
7110 
7111 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
7112   __extension__                                                         \
7113     ({                                                                  \
7114        int16x4_t c_ = (c);                                              \
7115        int16x8_t b_ = (b);                                              \
7116        int32x4_t a_ = (a);                                              \
7117        int32x4_t result;                                                \
7118        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
7119                 : "=w"(result)                                          \
7120                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7121                 : /* No clobbers */);                                   \
7122        result;                                                          \
7123      })
7124 
7125 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
7126   __extension__                                                         \
7127     ({                                                                  \
7128        int32x2_t c_ = (c);                                              \
7129        int32x4_t b_ = (b);                                              \
7130        int64x2_t a_ = (a);                                              \
7131        int64x2_t result;                                                \
7132        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
7133                 : "=w"(result)                                          \
7134                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7135                 : /* No clobbers */);                                   \
7136        result;                                                          \
7137      })
7138 
7139 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
7140   __extension__                                                         \
7141     ({                                                                  \
7142        uint16x4_t c_ = (c);                                             \
7143        uint16x8_t b_ = (b);                                             \
7144        uint32x4_t a_ = (a);                                             \
7145        uint32x4_t result;                                               \
7146        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
7147                 : "=w"(result)                                          \
7148                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7149                 : /* No clobbers */);                                   \
7150        result;                                                          \
7151      })
7152 
7153 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
7154   __extension__                                                         \
7155     ({                                                                  \
7156        uint32x2_t c_ = (c);                                             \
7157        uint32x4_t b_ = (b);                                             \
7158        uint64x2_t a_ = (a);                                             \
7159        uint64x2_t result;                                               \
7160        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
7161                 : "=w"(result)                                          \
7162                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7163                 : /* No clobbers */);                                   \
7164        result;                                                          \
7165      })
7166 
7167 #define vmlsl_high_laneq_s16(a, b, c, d)                                \
7168   __extension__                                                         \
7169     ({                                                                  \
7170        int16x8_t c_ = (c);                                              \
7171        int16x8_t b_ = (b);                                              \
7172        int32x4_t a_ = (a);                                              \
7173        int32x4_t result;                                                \
7174        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
7175                 : "=w"(result)                                          \
7176                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7177                 : /* No clobbers */);                                   \
7178        result;                                                          \
7179      })
7180 
7181 #define vmlsl_high_laneq_s32(a, b, c, d)                                \
7182   __extension__                                                         \
7183     ({                                                                  \
7184        int32x4_t c_ = (c);                                              \
7185        int32x4_t b_ = (b);                                              \
7186        int64x2_t a_ = (a);                                              \
7187        int64x2_t result;                                                \
7188        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
7189                 : "=w"(result)                                          \
7190                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7191                 : /* No clobbers */);                                   \
7192        result;                                                          \
7193      })
7194 
7195 #define vmlsl_high_laneq_u16(a, b, c, d)                                \
7196   __extension__                                                         \
7197     ({                                                                  \
7198        uint16x8_t c_ = (c);                                             \
7199        uint16x8_t b_ = (b);                                             \
7200        uint32x4_t a_ = (a);                                             \
7201        uint32x4_t result;                                               \
7202        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
7203                 : "=w"(result)                                          \
7204                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7205                 : /* No clobbers */);                                   \
7206        result;                                                          \
7207      })
7208 
7209 #define vmlsl_high_laneq_u32(a, b, c, d)                                \
7210   __extension__                                                         \
7211     ({                                                                  \
7212        uint32x4_t c_ = (c);                                             \
7213        uint32x4_t b_ = (b);                                             \
7214        uint64x2_t a_ = (a);                                             \
7215        uint64x2_t result;                                               \
7216        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
7217                 : "=w"(result)                                          \
7218                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7219                 : /* No clobbers */);                                   \
7220        result;                                                          \
7221      })
7222 
7223 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)7224 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7225 {
7226   int32x4_t result;
7227   __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7228            : "=w"(result)
7229            : "0"(a), "w"(b), "x"(c)
7230            : /* No clobbers */);
7231   return result;
7232 }
7233 
7234 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)7235 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7236 {
7237   int64x2_t result;
7238   __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7239            : "=w"(result)
7240            : "0"(a), "w"(b), "w"(c)
7241            : /* No clobbers */);
7242   return result;
7243 }
7244 
7245 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)7246 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7247 {
7248   uint32x4_t result;
7249   __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7250            : "=w"(result)
7251            : "0"(a), "w"(b), "x"(c)
7252            : /* No clobbers */);
7253   return result;
7254 }
7255 
7256 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)7257 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7258 {
7259   uint64x2_t result;
7260   __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7261            : "=w"(result)
7262            : "0"(a), "w"(b), "w"(c)
7263            : /* No clobbers */);
7264   return result;
7265 }
7266 
7267 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)7268 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7269 {
7270   int16x8_t result;
7271   __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7272            : "=w"(result)
7273            : "0"(a), "w"(b), "w"(c)
7274            : /* No clobbers */);
7275   return result;
7276 }
7277 
7278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)7279 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7280 {
7281   int32x4_t result;
7282   __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7283            : "=w"(result)
7284            : "0"(a), "w"(b), "w"(c)
7285            : /* No clobbers */);
7286   return result;
7287 }
7288 
7289 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)7290 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7291 {
7292   int64x2_t result;
7293   __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7294            : "=w"(result)
7295            : "0"(a), "w"(b), "w"(c)
7296            : /* No clobbers */);
7297   return result;
7298 }
7299 
7300 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)7301 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7302 {
7303   uint16x8_t result;
7304   __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7305            : "=w"(result)
7306            : "0"(a), "w"(b), "w"(c)
7307            : /* No clobbers */);
7308   return result;
7309 }
7310 
7311 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)7312 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7313 {
7314   uint32x4_t result;
7315   __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7316            : "=w"(result)
7317            : "0"(a), "w"(b), "w"(c)
7318            : /* No clobbers */);
7319   return result;
7320 }
7321 
7322 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)7323 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7324 {
7325   uint64x2_t result;
7326   __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7327            : "=w"(result)
7328            : "0"(a), "w"(b), "w"(c)
7329            : /* No clobbers */);
7330   return result;
7331 }
7332 
7333 #define vmlsl_lane_s16(a, b, c, d)                                      \
7334   __extension__                                                         \
7335     ({                                                                  \
7336        int16x4_t c_ = (c);                                              \
7337        int16x4_t b_ = (b);                                              \
7338        int32x4_t a_ = (a);                                              \
7339        int32x4_t result;                                                \
7340        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
7341                 : "=w"(result)                                          \
7342                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7343                 : /* No clobbers */);                                   \
7344        result;                                                          \
7345      })
7346 
7347 #define vmlsl_lane_s32(a, b, c, d)                                      \
7348   __extension__                                                         \
7349     ({                                                                  \
7350        int32x2_t c_ = (c);                                              \
7351        int32x2_t b_ = (b);                                              \
7352        int64x2_t a_ = (a);                                              \
7353        int64x2_t result;                                                \
7354        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
7355                 : "=w"(result)                                          \
7356                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7357                 : /* No clobbers */);                                   \
7358        result;                                                          \
7359      })
7360 
7361 #define vmlsl_lane_u16(a, b, c, d)                                      \
7362   __extension__                                                         \
7363     ({                                                                  \
7364        uint16x4_t c_ = (c);                                             \
7365        uint16x4_t b_ = (b);                                             \
7366        uint32x4_t a_ = (a);                                             \
7367        uint32x4_t result;                                               \
7368        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
7369                 : "=w"(result)                                          \
7370                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7371                 : /* No clobbers */);                                   \
7372        result;                                                          \
7373      })
7374 
7375 #define vmlsl_lane_u32(a, b, c, d)                                      \
7376   __extension__                                                         \
7377     ({                                                                  \
7378        uint32x2_t c_ = (c);                                             \
7379        uint32x2_t b_ = (b);                                             \
7380        uint64x2_t a_ = (a);                                             \
7381        uint64x2_t result;                                               \
7382        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
7383                 : "=w"(result)                                          \
7384                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7385                 : /* No clobbers */);                                   \
7386        result;                                                          \
7387      })
7388 
7389 #define vmlsl_laneq_s16(a, b, c, d)                                     \
7390   __extension__                                                         \
7391     ({                                                                  \
7392        int16x8_t c_ = (c);                                              \
7393        int16x4_t b_ = (b);                                              \
7394        int32x4_t a_ = (a);                                              \
7395        int32x4_t result;                                                \
7396        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
7397                 : "=w"(result)                                          \
7398                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7399                 : /* No clobbers */);                                   \
7400        result;                                                          \
7401      })
7402 
7403 #define vmlsl_laneq_s32(a, b, c, d)                                     \
7404   __extension__                                                         \
7405     ({                                                                  \
7406        int32x4_t c_ = (c);                                              \
7407        int32x2_t b_ = (b);                                              \
7408        int64x2_t a_ = (a);                                              \
7409        int64x2_t result;                                                \
7410        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
7411                 : "=w"(result)                                          \
7412                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7413                 : /* No clobbers */);                                   \
7414        result;                                                          \
7415      })
7416 
7417 #define vmlsl_laneq_u16(a, b, c, d)                                     \
7418   __extension__                                                         \
7419     ({                                                                  \
7420        uint16x8_t c_ = (c);                                             \
7421        uint16x4_t b_ = (b);                                             \
7422        uint32x4_t a_ = (a);                                             \
7423        uint32x4_t result;                                               \
7424        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
7425                 : "=w"(result)                                          \
7426                 : "0"(a_), "w"(b_), "x"(c_), "i"(d)                     \
7427                 : /* No clobbers */);                                   \
7428        result;                                                          \
7429      })
7430 
7431 #define vmlsl_laneq_u32(a, b, c, d)                                     \
7432   __extension__                                                         \
7433     ({                                                                  \
7434        uint32x4_t c_ = (c);                                             \
7435        uint32x2_t b_ = (b);                                             \
7436        uint64x2_t a_ = (a);                                             \
7437        uint64x2_t result;                                               \
7438        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
7439                 : "=w"(result)                                          \
7440                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7441                 : /* No clobbers */);                                   \
7442        result;                                                          \
7443      })
7444 
7445 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)7446 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7447 {
7448   int32x4_t result;
7449   __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7450            : "=w"(result)
7451            : "0"(a), "w"(b), "x"(c)
7452            : /* No clobbers */);
7453   return result;
7454 }
7455 
7456 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)7457 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7458 {
7459   int64x2_t result;
7460   __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7461            : "=w"(result)
7462            : "0"(a), "w"(b), "w"(c)
7463            : /* No clobbers */);
7464   return result;
7465 }
7466 
7467 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7468 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7469 {
7470   uint32x4_t result;
7471   __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7472            : "=w"(result)
7473            : "0"(a), "w"(b), "x"(c)
7474            : /* No clobbers */);
7475   return result;
7476 }
7477 
7478 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7479 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7480 {
7481   uint64x2_t result;
7482   __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7483            : "=w"(result)
7484            : "0"(a), "w"(b), "w"(c)
7485            : /* No clobbers */);
7486   return result;
7487 }
7488 
7489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)7490 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7491 {
7492   int16x8_t result;
7493   __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7494            : "=w"(result)
7495            : "0"(a), "w"(b), "w"(c)
7496            : /* No clobbers */);
7497   return result;
7498 }
7499 
7500 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)7501 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7502 {
7503   int32x4_t result;
7504   __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7505            : "=w"(result)
7506            : "0"(a), "w"(b), "w"(c)
7507            : /* No clobbers */);
7508   return result;
7509 }
7510 
7511 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)7512 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7513 {
7514   int64x2_t result;
7515   __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7516            : "=w"(result)
7517            : "0"(a), "w"(b), "w"(c)
7518            : /* No clobbers */);
7519   return result;
7520 }
7521 
7522 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7523 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7524 {
7525   uint16x8_t result;
7526   __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7527            : "=w"(result)
7528            : "0"(a), "w"(b), "w"(c)
7529            : /* No clobbers */);
7530   return result;
7531 }
7532 
7533 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7534 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7535 {
7536   uint32x4_t result;
7537   __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7538            : "=w"(result)
7539            : "0"(a), "w"(b), "w"(c)
7540            : /* No clobbers */);
7541   return result;
7542 }
7543 
7544 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7545 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7546 {
7547   uint64x2_t result;
7548   __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7549            : "=w"(result)
7550            : "0"(a), "w"(b), "w"(c)
7551            : /* No clobbers */);
7552   return result;
7553 }
7554 
7555 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7556 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7557 {
7558   float32x4_t result;
7559   float32x4_t t1;
7560   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7561            : "=w"(result), "=w"(t1)
7562            : "0"(a), "w"(b), "w"(c)
7563            : /* No clobbers */);
7564   return result;
7565 }
7566 
7567 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7568 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7569 {
7570   int16x8_t result;
7571   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7572            : "=w"(result)
7573            : "0"(a), "w"(b), "x"(c)
7574            : /* No clobbers */);
7575   return result;
7576 }
7577 
7578 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7579 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7580 {
7581   int32x4_t result;
7582   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7583            : "=w"(result)
7584            : "0"(a), "w"(b), "w"(c)
7585            : /* No clobbers */);
7586   return result;
7587 }
7588 
7589 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7590 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7591 {
7592   uint16x8_t result;
7593   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7594            : "=w"(result)
7595            : "0"(a), "w"(b), "x"(c)
7596            : /* No clobbers */);
7597   return result;
7598 }
7599 
7600 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7601 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7602 {
7603   uint32x4_t result;
7604   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7605            : "=w"(result)
7606            : "0"(a), "w"(b), "w"(c)
7607            : /* No clobbers */);
7608   return result;
7609 }
7610 
7611 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7612 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7613 {
7614   int8x16_t result;
7615   __asm__ ("mls %0.16b,%2.16b,%3.16b"
7616            : "=w"(result)
7617            : "0"(a), "w"(b), "w"(c)
7618            : /* No clobbers */);
7619   return result;
7620 }
7621 
7622 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7623 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7624 {
7625   int16x8_t result;
7626   __asm__ ("mls %0.8h,%2.8h,%3.8h"
7627            : "=w"(result)
7628            : "0"(a), "w"(b), "w"(c)
7629            : /* No clobbers */);
7630   return result;
7631 }
7632 
7633 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7634 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7635 {
7636   int32x4_t result;
7637   __asm__ ("mls %0.4s,%2.4s,%3.4s"
7638            : "=w"(result)
7639            : "0"(a), "w"(b), "w"(c)
7640            : /* No clobbers */);
7641   return result;
7642 }
7643 
7644 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7645 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7646 {
7647   uint8x16_t result;
7648   __asm__ ("mls %0.16b,%2.16b,%3.16b"
7649            : "=w"(result)
7650            : "0"(a), "w"(b), "w"(c)
7651            : /* No clobbers */);
7652   return result;
7653 }
7654 
7655 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7656 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7657 {
7658   uint16x8_t result;
7659   __asm__ ("mls %0.8h,%2.8h,%3.8h"
7660            : "=w"(result)
7661            : "0"(a), "w"(b), "w"(c)
7662            : /* No clobbers */);
7663   return result;
7664 }
7665 
7666 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7667 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7668 {
7669   uint32x4_t result;
7670   __asm__ ("mls %0.4s,%2.4s,%3.4s"
7671            : "=w"(result)
7672            : "0"(a), "w"(b), "w"(c)
7673            : /* No clobbers */);
7674   return result;
7675 }
7676 
7677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovl_high_s8(int8x16_t a)7678 vmovl_high_s8 (int8x16_t a)
7679 {
7680   int16x8_t result;
7681   __asm__ ("sshll2 %0.8h,%1.16b,#0"
7682            : "=w"(result)
7683            : "w"(a)
7684            : /* No clobbers */);
7685   return result;
7686 }
7687 
7688 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovl_high_s16(int16x8_t a)7689 vmovl_high_s16 (int16x8_t a)
7690 {
7691   int32x4_t result;
7692   __asm__ ("sshll2 %0.4s,%1.8h,#0"
7693            : "=w"(result)
7694            : "w"(a)
7695            : /* No clobbers */);
7696   return result;
7697 }
7698 
7699 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovl_high_s32(int32x4_t a)7700 vmovl_high_s32 (int32x4_t a)
7701 {
7702   int64x2_t result;
7703   __asm__ ("sshll2 %0.2d,%1.4s,#0"
7704            : "=w"(result)
7705            : "w"(a)
7706            : /* No clobbers */);
7707   return result;
7708 }
7709 
7710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovl_high_u8(uint8x16_t a)7711 vmovl_high_u8 (uint8x16_t a)
7712 {
7713   uint16x8_t result;
7714   __asm__ ("ushll2 %0.8h,%1.16b,#0"
7715            : "=w"(result)
7716            : "w"(a)
7717            : /* No clobbers */);
7718   return result;
7719 }
7720 
7721 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovl_high_u16(uint16x8_t a)7722 vmovl_high_u16 (uint16x8_t a)
7723 {
7724   uint32x4_t result;
7725   __asm__ ("ushll2 %0.4s,%1.8h,#0"
7726            : "=w"(result)
7727            : "w"(a)
7728            : /* No clobbers */);
7729   return result;
7730 }
7731 
7732 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovl_high_u32(uint32x4_t a)7733 vmovl_high_u32 (uint32x4_t a)
7734 {
7735   uint64x2_t result;
7736   __asm__ ("ushll2 %0.2d,%1.4s,#0"
7737            : "=w"(result)
7738            : "w"(a)
7739            : /* No clobbers */);
7740   return result;
7741 }
7742 
7743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovl_s8(int8x8_t a)7744 vmovl_s8 (int8x8_t a)
7745 {
7746   int16x8_t result;
7747   __asm__ ("sshll %0.8h,%1.8b,#0"
7748            : "=w"(result)
7749            : "w"(a)
7750            : /* No clobbers */);
7751   return result;
7752 }
7753 
7754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovl_s16(int16x4_t a)7755 vmovl_s16 (int16x4_t a)
7756 {
7757   int32x4_t result;
7758   __asm__ ("sshll %0.4s,%1.4h,#0"
7759            : "=w"(result)
7760            : "w"(a)
7761            : /* No clobbers */);
7762   return result;
7763 }
7764 
7765 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovl_s32(int32x2_t a)7766 vmovl_s32 (int32x2_t a)
7767 {
7768   int64x2_t result;
7769   __asm__ ("sshll %0.2d,%1.2s,#0"
7770            : "=w"(result)
7771            : "w"(a)
7772            : /* No clobbers */);
7773   return result;
7774 }
7775 
7776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovl_u8(uint8x8_t a)7777 vmovl_u8 (uint8x8_t a)
7778 {
7779   uint16x8_t result;
7780   __asm__ ("ushll %0.8h,%1.8b,#0"
7781            : "=w"(result)
7782            : "w"(a)
7783            : /* No clobbers */);
7784   return result;
7785 }
7786 
7787 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovl_u16(uint16x4_t a)7788 vmovl_u16 (uint16x4_t a)
7789 {
7790   uint32x4_t result;
7791   __asm__ ("ushll %0.4s,%1.4h,#0"
7792            : "=w"(result)
7793            : "w"(a)
7794            : /* No clobbers */);
7795   return result;
7796 }
7797 
7798 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovl_u32(uint32x2_t a)7799 vmovl_u32 (uint32x2_t a)
7800 {
7801   uint64x2_t result;
7802   __asm__ ("ushll %0.2d,%1.2s,#0"
7803            : "=w"(result)
7804            : "w"(a)
7805            : /* No clobbers */);
7806   return result;
7807 }
7808 
7809 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovn_high_s16(int8x8_t a,int16x8_t b)7810 vmovn_high_s16 (int8x8_t a, int16x8_t b)
7811 {
7812   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
7813   __asm__ ("xtn2 %0.16b,%1.8h"
7814            : "+w"(result)
7815            : "w"(b)
7816            : /* No clobbers */);
7817   return result;
7818 }
7819 
7820 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovn_high_s32(int16x4_t a,int32x4_t b)7821 vmovn_high_s32 (int16x4_t a, int32x4_t b)
7822 {
7823   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
7824   __asm__ ("xtn2 %0.8h,%1.4s"
7825            : "+w"(result)
7826            : "w"(b)
7827            : /* No clobbers */);
7828   return result;
7829 }
7830 
7831 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovn_high_s64(int32x2_t a,int64x2_t b)7832 vmovn_high_s64 (int32x2_t a, int64x2_t b)
7833 {
7834   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
7835   __asm__ ("xtn2 %0.4s,%1.2d"
7836            : "+w"(result)
7837            : "w"(b)
7838            : /* No clobbers */);
7839   return result;
7840 }
7841 
7842 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmovn_high_u16(uint8x8_t a,uint16x8_t b)7843 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
7844 {
7845   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
7846   __asm__ ("xtn2 %0.16b,%1.8h"
7847            : "+w"(result)
7848            : "w"(b)
7849            : /* No clobbers */);
7850   return result;
7851 }
7852 
7853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovn_high_u32(uint16x4_t a,uint32x4_t b)7854 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
7855 {
7856   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
7857   __asm__ ("xtn2 %0.8h,%1.4s"
7858            : "+w"(result)
7859            : "w"(b)
7860            : /* No clobbers */);
7861   return result;
7862 }
7863 
7864 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovn_high_u64(uint32x2_t a,uint64x2_t b)7865 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
7866 {
7867   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
7868   __asm__ ("xtn2 %0.4s,%1.2d"
7869            : "+w"(result)
7870            : "w"(b)
7871            : /* No clobbers */);
7872   return result;
7873 }
7874 
7875 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmovn_s16(int16x8_t a)7876 vmovn_s16 (int16x8_t a)
7877 {
7878   int8x8_t result;
7879   __asm__ ("xtn %0.8b,%1.8h"
7880            : "=w"(result)
7881            : "w"(a)
7882            : /* No clobbers */);
7883   return result;
7884 }
7885 
7886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmovn_s32(int32x4_t a)7887 vmovn_s32 (int32x4_t a)
7888 {
7889   int16x4_t result;
7890   __asm__ ("xtn %0.4h,%1.4s"
7891            : "=w"(result)
7892            : "w"(a)
7893            : /* No clobbers */);
7894   return result;
7895 }
7896 
7897 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmovn_s64(int64x2_t a)7898 vmovn_s64 (int64x2_t a)
7899 {
7900   int32x2_t result;
7901   __asm__ ("xtn %0.2s,%1.2d"
7902            : "=w"(result)
7903            : "w"(a)
7904            : /* No clobbers */);
7905   return result;
7906 }
7907 
7908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmovn_u16(uint16x8_t a)7909 vmovn_u16 (uint16x8_t a)
7910 {
7911   uint8x8_t result;
7912   __asm__ ("xtn %0.8b,%1.8h"
7913            : "=w"(result)
7914            : "w"(a)
7915            : /* No clobbers */);
7916   return result;
7917 }
7918 
7919 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmovn_u32(uint32x4_t a)7920 vmovn_u32 (uint32x4_t a)
7921 {
7922   uint16x4_t result;
7923   __asm__ ("xtn %0.4h,%1.4s"
7924            : "=w"(result)
7925            : "w"(a)
7926            : /* No clobbers */);
7927   return result;
7928 }
7929 
7930 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmovn_u64(uint64x2_t a)7931 vmovn_u64 (uint64x2_t a)
7932 {
7933   uint32x2_t result;
7934   __asm__ ("xtn %0.2s,%1.2d"
7935            : "=w"(result)
7936            : "w"(a)
7937            : /* No clobbers */);
7938   return result;
7939 }
7940 
7941 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_n_f32(float32x2_t a,float32_t b)7942 vmul_n_f32 (float32x2_t a, float32_t b)
7943 {
7944   float32x2_t result;
7945   __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
7946            : "=w"(result)
7947            : "w"(a), "w"(b)
7948            : /* No clobbers */);
7949   return result;
7950 }
7951 
7952 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_n_s16(int16x4_t a,int16_t b)7953 vmul_n_s16 (int16x4_t a, int16_t b)
7954 {
7955   int16x4_t result;
7956   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7957            : "=w"(result)
7958            : "w"(a), "x"(b)
7959            : /* No clobbers */);
7960   return result;
7961 }
7962 
7963 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_n_s32(int32x2_t a,int32_t b)7964 vmul_n_s32 (int32x2_t a, int32_t b)
7965 {
7966   int32x2_t result;
7967   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7968            : "=w"(result)
7969            : "w"(a), "w"(b)
7970            : /* No clobbers */);
7971   return result;
7972 }
7973 
7974 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_n_u16(uint16x4_t a,uint16_t b)7975 vmul_n_u16 (uint16x4_t a, uint16_t b)
7976 {
7977   uint16x4_t result;
7978   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7979            : "=w"(result)
7980            : "w"(a), "x"(b)
7981            : /* No clobbers */);
7982   return result;
7983 }
7984 
7985 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_n_u32(uint32x2_t a,uint32_t b)7986 vmul_n_u32 (uint32x2_t a, uint32_t b)
7987 {
7988   uint32x2_t result;
7989   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7990            : "=w"(result)
7991            : "w"(a), "w"(b)
7992            : /* No clobbers */);
7993   return result;
7994 }
7995 
7996 #define vmull_high_lane_s16(a, b, c)                                    \
7997   __extension__                                                         \
7998     ({                                                                  \
7999        int16x4_t b_ = (b);                                              \
8000        int16x8_t a_ = (a);                                              \
8001        int32x4_t result;                                                \
8002        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
8003                 : "=w"(result)                                          \
8004                 : "w"(a_), "x"(b_), "i"(c)                              \
8005                 : /* No clobbers */);                                   \
8006        result;                                                          \
8007      })
8008 
8009 #define vmull_high_lane_s32(a, b, c)                                    \
8010   __extension__                                                         \
8011     ({                                                                  \
8012        int32x2_t b_ = (b);                                              \
8013        int32x4_t a_ = (a);                                              \
8014        int64x2_t result;                                                \
8015        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
8016                 : "=w"(result)                                          \
8017                 : "w"(a_), "w"(b_), "i"(c)                              \
8018                 : /* No clobbers */);                                   \
8019        result;                                                          \
8020      })
8021 
8022 #define vmull_high_lane_u16(a, b, c)                                    \
8023   __extension__                                                         \
8024     ({                                                                  \
8025        uint16x4_t b_ = (b);                                             \
8026        uint16x8_t a_ = (a);                                             \
8027        uint32x4_t result;                                               \
8028        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
8029                 : "=w"(result)                                          \
8030                 : "w"(a_), "x"(b_), "i"(c)                              \
8031                 : /* No clobbers */);                                   \
8032        result;                                                          \
8033      })
8034 
8035 #define vmull_high_lane_u32(a, b, c)                                    \
8036   __extension__                                                         \
8037     ({                                                                  \
8038        uint32x2_t b_ = (b);                                             \
8039        uint32x4_t a_ = (a);                                             \
8040        uint64x2_t result;                                               \
8041        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
8042                 : "=w"(result)                                          \
8043                 : "w"(a_), "w"(b_), "i"(c)                              \
8044                 : /* No clobbers */);                                   \
8045        result;                                                          \
8046      })
8047 
8048 #define vmull_high_laneq_s16(a, b, c)                                   \
8049   __extension__                                                         \
8050     ({                                                                  \
8051        int16x8_t b_ = (b);                                              \
8052        int16x8_t a_ = (a);                                              \
8053        int32x4_t result;                                                \
8054        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
8055                 : "=w"(result)                                          \
8056                 : "w"(a_), "x"(b_), "i"(c)                              \
8057                 : /* No clobbers */);                                   \
8058        result;                                                          \
8059      })
8060 
8061 #define vmull_high_laneq_s32(a, b, c)                                   \
8062   __extension__                                                         \
8063     ({                                                                  \
8064        int32x4_t b_ = (b);                                              \
8065        int32x4_t a_ = (a);                                              \
8066        int64x2_t result;                                                \
8067        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
8068                 : "=w"(result)                                          \
8069                 : "w"(a_), "w"(b_), "i"(c)                              \
8070                 : /* No clobbers */);                                   \
8071        result;                                                          \
8072      })
8073 
8074 #define vmull_high_laneq_u16(a, b, c)                                   \
8075   __extension__                                                         \
8076     ({                                                                  \
8077        uint16x8_t b_ = (b);                                             \
8078        uint16x8_t a_ = (a);                                             \
8079        uint32x4_t result;                                               \
8080        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
8081                 : "=w"(result)                                          \
8082                 : "w"(a_), "x"(b_), "i"(c)                              \
8083                 : /* No clobbers */);                                   \
8084        result;                                                          \
8085      })
8086 
8087 #define vmull_high_laneq_u32(a, b, c)                                   \
8088   __extension__                                                         \
8089     ({                                                                  \
8090        uint32x4_t b_ = (b);                                             \
8091        uint32x4_t a_ = (a);                                             \
8092        uint64x2_t result;                                               \
8093        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
8094                 : "=w"(result)                                          \
8095                 : "w"(a_), "w"(b_), "i"(c)                              \
8096                 : /* No clobbers */);                                   \
8097        result;                                                          \
8098      })
8099 
8100 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_high_n_s16(int16x8_t a,int16_t b)8101 vmull_high_n_s16 (int16x8_t a, int16_t b)
8102 {
8103   int32x4_t result;
8104   __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8105            : "=w"(result)
8106            : "w"(a), "x"(b)
8107            : /* No clobbers */);
8108   return result;
8109 }
8110 
8111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_high_n_s32(int32x4_t a,int32_t b)8112 vmull_high_n_s32 (int32x4_t a, int32_t b)
8113 {
8114   int64x2_t result;
8115   __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8116            : "=w"(result)
8117            : "w"(a), "w"(b)
8118            : /* No clobbers */);
8119   return result;
8120 }
8121 
8122 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_high_n_u16(uint16x8_t a,uint16_t b)8123 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8124 {
8125   uint32x4_t result;
8126   __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8127            : "=w"(result)
8128            : "w"(a), "x"(b)
8129            : /* No clobbers */);
8130   return result;
8131 }
8132 
8133 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_high_n_u32(uint32x4_t a,uint32_t b)8134 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8135 {
8136   uint64x2_t result;
8137   __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8138            : "=w"(result)
8139            : "w"(a), "w"(b)
8140            : /* No clobbers */);
8141   return result;
8142 }
8143 
8144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmull_high_p8(poly8x16_t a,poly8x16_t b)8145 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8146 {
8147   poly16x8_t result;
8148   __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8149            : "=w"(result)
8150            : "w"(a), "w"(b)
8151            : /* No clobbers */);
8152   return result;
8153 }
8154 
8155 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmull_high_s8(int8x16_t a,int8x16_t b)8156 vmull_high_s8 (int8x16_t a, int8x16_t b)
8157 {
8158   int16x8_t result;
8159   __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8160            : "=w"(result)
8161            : "w"(a), "w"(b)
8162            : /* No clobbers */);
8163   return result;
8164 }
8165 
8166 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_high_s16(int16x8_t a,int16x8_t b)8167 vmull_high_s16 (int16x8_t a, int16x8_t b)
8168 {
8169   int32x4_t result;
8170   __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8171            : "=w"(result)
8172            : "w"(a), "w"(b)
8173            : /* No clobbers */);
8174   return result;
8175 }
8176 
8177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_high_s32(int32x4_t a,int32x4_t b)8178 vmull_high_s32 (int32x4_t a, int32x4_t b)
8179 {
8180   int64x2_t result;
8181   __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8182            : "=w"(result)
8183            : "w"(a), "w"(b)
8184            : /* No clobbers */);
8185   return result;
8186 }
8187 
8188 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmull_high_u8(uint8x16_t a,uint8x16_t b)8189 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8190 {
8191   uint16x8_t result;
8192   __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8193            : "=w"(result)
8194            : "w"(a), "w"(b)
8195            : /* No clobbers */);
8196   return result;
8197 }
8198 
8199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_high_u16(uint16x8_t a,uint16x8_t b)8200 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8201 {
8202   uint32x4_t result;
8203   __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8204            : "=w"(result)
8205            : "w"(a), "w"(b)
8206            : /* No clobbers */);
8207   return result;
8208 }
8209 
8210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_high_u32(uint32x4_t a,uint32x4_t b)8211 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8212 {
8213   uint64x2_t result;
8214   __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8215            : "=w"(result)
8216            : "w"(a), "w"(b)
8217            : /* No clobbers */);
8218   return result;
8219 }
8220 
8221 #define vmull_lane_s16(a, b, c)                                         \
8222   __extension__                                                         \
8223     ({                                                                  \
8224        int16x4_t b_ = (b);                                              \
8225        int16x4_t a_ = (a);                                              \
8226        int32x4_t result;                                                \
8227        __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
8228                 : "=w"(result)                                          \
8229                 : "w"(a_), "x"(b_), "i"(c)                              \
8230                 : /* No clobbers */);                                   \
8231        result;                                                          \
8232      })
8233 
8234 #define vmull_lane_s32(a, b, c)                                         \
8235   __extension__                                                         \
8236     ({                                                                  \
8237        int32x2_t b_ = (b);                                              \
8238        int32x2_t a_ = (a);                                              \
8239        int64x2_t result;                                                \
8240        __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
8241                 : "=w"(result)                                          \
8242                 : "w"(a_), "w"(b_), "i"(c)                              \
8243                 : /* No clobbers */);                                   \
8244        result;                                                          \
8245      })
8246 
8247 #define vmull_lane_u16(a, b, c)                                         \
8248   __extension__                                                         \
8249     ({                                                                  \
8250        uint16x4_t b_ = (b);                                             \
8251        uint16x4_t a_ = (a);                                             \
8252        uint32x4_t result;                                               \
8253        __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
8254                 : "=w"(result)                                          \
8255                 : "w"(a_), "x"(b_), "i"(c)                              \
8256                 : /* No clobbers */);                                   \
8257        result;                                                          \
8258      })
8259 
8260 #define vmull_lane_u32(a, b, c)                                         \
8261   __extension__                                                         \
8262     ({                                                                  \
8263        uint32x2_t b_ = (b);                                             \
8264        uint32x2_t a_ = (a);                                             \
8265        uint64x2_t result;                                               \
8266        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
8267                 : "=w"(result)                                          \
8268                 : "w"(a_), "w"(b_), "i"(c)                              \
8269                 : /* No clobbers */);                                   \
8270        result;                                                          \
8271      })
8272 
8273 #define vmull_laneq_s16(a, b, c)                                        \
8274   __extension__                                                         \
8275     ({                                                                  \
8276        int16x8_t b_ = (b);                                              \
8277        int16x4_t a_ = (a);                                              \
8278        int32x4_t result;                                                \
8279        __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
8280                 : "=w"(result)                                          \
8281                 : "w"(a_), "x"(b_), "i"(c)                              \
8282                 : /* No clobbers */);                                   \
8283        result;                                                          \
8284      })
8285 
8286 #define vmull_laneq_s32(a, b, c)                                        \
8287   __extension__                                                         \
8288     ({                                                                  \
8289        int32x4_t b_ = (b);                                              \
8290        int32x2_t a_ = (a);                                              \
8291        int64x2_t result;                                                \
8292        __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
8293                 : "=w"(result)                                          \
8294                 : "w"(a_), "w"(b_), "i"(c)                              \
8295                 : /* No clobbers */);                                   \
8296        result;                                                          \
8297      })
8298 
8299 #define vmull_laneq_u16(a, b, c)                                        \
8300   __extension__                                                         \
8301     ({                                                                  \
8302        uint16x8_t b_ = (b);                                             \
8303        uint16x4_t a_ = (a);                                             \
8304        uint32x4_t result;                                               \
8305        __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
8306                 : "=w"(result)                                          \
8307                 : "w"(a_), "x"(b_), "i"(c)                              \
8308                 : /* No clobbers */);                                   \
8309        result;                                                          \
8310      })
8311 
8312 #define vmull_laneq_u32(a, b, c)                                        \
8313   __extension__                                                         \
8314     ({                                                                  \
8315        uint32x4_t b_ = (b);                                             \
8316        uint32x2_t a_ = (a);                                             \
8317        uint64x2_t result;                                               \
8318        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
8319                 : "=w"(result)                                          \
8320                 : "w"(a_), "w"(b_), "i"(c)                              \
8321                 : /* No clobbers */);                                   \
8322        result;                                                          \
8323      })
8324 
8325 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_n_s16(int16x4_t a,int16_t b)8326 vmull_n_s16 (int16x4_t a, int16_t b)
8327 {
8328   int32x4_t result;
8329   __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8330            : "=w"(result)
8331            : "w"(a), "x"(b)
8332            : /* No clobbers */);
8333   return result;
8334 }
8335 
8336 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_n_s32(int32x2_t a,int32_t b)8337 vmull_n_s32 (int32x2_t a, int32_t b)
8338 {
8339   int64x2_t result;
8340   __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8341            : "=w"(result)
8342            : "w"(a), "w"(b)
8343            : /* No clobbers */);
8344   return result;
8345 }
8346 
8347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_n_u16(uint16x4_t a,uint16_t b)8348 vmull_n_u16 (uint16x4_t a, uint16_t b)
8349 {
8350   uint32x4_t result;
8351   __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8352            : "=w"(result)
8353            : "w"(a), "x"(b)
8354            : /* No clobbers */);
8355   return result;
8356 }
8357 
8358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_n_u32(uint32x2_t a,uint32_t b)8359 vmull_n_u32 (uint32x2_t a, uint32_t b)
8360 {
8361   uint64x2_t result;
8362   __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8363            : "=w"(result)
8364            : "w"(a), "w"(b)
8365            : /* No clobbers */);
8366   return result;
8367 }
8368 
8369 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmull_p8(poly8x8_t a,poly8x8_t b)8370 vmull_p8 (poly8x8_t a, poly8x8_t b)
8371 {
8372   poly16x8_t result;
8373   __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8374            : "=w"(result)
8375            : "w"(a), "w"(b)
8376            : /* No clobbers */);
8377   return result;
8378 }
8379 
8380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmull_s8(int8x8_t a,int8x8_t b)8381 vmull_s8 (int8x8_t a, int8x8_t b)
8382 {
8383   int16x8_t result;
8384   __asm__ ("smull %0.8h, %1.8b, %2.8b"
8385            : "=w"(result)
8386            : "w"(a), "w"(b)
8387            : /* No clobbers */);
8388   return result;
8389 }
8390 
8391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_s16(int16x4_t a,int16x4_t b)8392 vmull_s16 (int16x4_t a, int16x4_t b)
8393 {
8394   int32x4_t result;
8395   __asm__ ("smull %0.4s, %1.4h, %2.4h"
8396            : "=w"(result)
8397            : "w"(a), "w"(b)
8398            : /* No clobbers */);
8399   return result;
8400 }
8401 
8402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_s32(int32x2_t a,int32x2_t b)8403 vmull_s32 (int32x2_t a, int32x2_t b)
8404 {
8405   int64x2_t result;
8406   __asm__ ("smull %0.2d, %1.2s, %2.2s"
8407            : "=w"(result)
8408            : "w"(a), "w"(b)
8409            : /* No clobbers */);
8410   return result;
8411 }
8412 
8413 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmull_u8(uint8x8_t a,uint8x8_t b)8414 vmull_u8 (uint8x8_t a, uint8x8_t b)
8415 {
8416   uint16x8_t result;
8417   __asm__ ("umull %0.8h, %1.8b, %2.8b"
8418            : "=w"(result)
8419            : "w"(a), "w"(b)
8420            : /* No clobbers */);
8421   return result;
8422 }
8423 
8424 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_u16(uint16x4_t a,uint16x4_t b)8425 vmull_u16 (uint16x4_t a, uint16x4_t b)
8426 {
8427   uint32x4_t result;
8428   __asm__ ("umull %0.4s, %1.4h, %2.4h"
8429            : "=w"(result)
8430            : "w"(a), "w"(b)
8431            : /* No clobbers */);
8432   return result;
8433 }
8434 
8435 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_u32(uint32x2_t a,uint32x2_t b)8436 vmull_u32 (uint32x2_t a, uint32x2_t b)
8437 {
8438   uint64x2_t result;
8439   __asm__ ("umull %0.2d, %1.2s, %2.2s"
8440            : "=w"(result)
8441            : "w"(a), "w"(b)
8442            : /* No clobbers */);
8443   return result;
8444 }
8445 
8446 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_n_f32(float32x4_t a,float32_t b)8447 vmulq_n_f32 (float32x4_t a, float32_t b)
8448 {
8449   float32x4_t result;
8450   __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8451            : "=w"(result)
8452            : "w"(a), "w"(b)
8453            : /* No clobbers */);
8454   return result;
8455 }
8456 
8457 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_n_f64(float64x2_t a,float64_t b)8458 vmulq_n_f64 (float64x2_t a, float64_t b)
8459 {
8460   float64x2_t result;
8461   __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8462            : "=w"(result)
8463            : "w"(a), "w"(b)
8464            : /* No clobbers */);
8465   return result;
8466 }
8467 
8468 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_n_s16(int16x8_t a,int16_t b)8469 vmulq_n_s16 (int16x8_t a, int16_t b)
8470 {
8471   int16x8_t result;
8472   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8473            : "=w"(result)
8474            : "w"(a), "x"(b)
8475            : /* No clobbers */);
8476   return result;
8477 }
8478 
8479 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_n_s32(int32x4_t a,int32_t b)8480 vmulq_n_s32 (int32x4_t a, int32_t b)
8481 {
8482   int32x4_t result;
8483   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8484            : "=w"(result)
8485            : "w"(a), "w"(b)
8486            : /* No clobbers */);
8487   return result;
8488 }
8489 
8490 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_n_u16(uint16x8_t a,uint16_t b)8491 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8492 {
8493   uint16x8_t result;
8494   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8495            : "=w"(result)
8496            : "w"(a), "x"(b)
8497            : /* No clobbers */);
8498   return result;
8499 }
8500 
8501 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_n_u32(uint32x4_t a,uint32_t b)8502 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8503 {
8504   uint32x4_t result;
8505   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8506            : "=w"(result)
8507            : "w"(a), "w"(b)
8508            : /* No clobbers */);
8509   return result;
8510 }
8511 
8512 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8(poly8x8_t a)8513 vmvn_p8 (poly8x8_t a)
8514 {
8515   poly8x8_t result;
8516   __asm__ ("mvn %0.8b,%1.8b"
8517            : "=w"(result)
8518            : "w"(a)
8519            : /* No clobbers */);
8520   return result;
8521 }
8522 
8523 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmvn_s8(int8x8_t a)8524 vmvn_s8 (int8x8_t a)
8525 {
8526   int8x8_t result;
8527   __asm__ ("mvn %0.8b,%1.8b"
8528            : "=w"(result)
8529            : "w"(a)
8530            : /* No clobbers */);
8531   return result;
8532 }
8533 
8534 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmvn_s16(int16x4_t a)8535 vmvn_s16 (int16x4_t a)
8536 {
8537   int16x4_t result;
8538   __asm__ ("mvn %0.8b,%1.8b"
8539            : "=w"(result)
8540            : "w"(a)
8541            : /* No clobbers */);
8542   return result;
8543 }
8544 
8545 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmvn_s32(int32x2_t a)8546 vmvn_s32 (int32x2_t a)
8547 {
8548   int32x2_t result;
8549   __asm__ ("mvn %0.8b,%1.8b"
8550            : "=w"(result)
8551            : "w"(a)
8552            : /* No clobbers */);
8553   return result;
8554 }
8555 
8556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmvn_u8(uint8x8_t a)8557 vmvn_u8 (uint8x8_t a)
8558 {
8559   uint8x8_t result;
8560   __asm__ ("mvn %0.8b,%1.8b"
8561            : "=w"(result)
8562            : "w"(a)
8563            : /* No clobbers */);
8564   return result;
8565 }
8566 
8567 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmvn_u16(uint16x4_t a)8568 vmvn_u16 (uint16x4_t a)
8569 {
8570   uint16x4_t result;
8571   __asm__ ("mvn %0.8b,%1.8b"
8572            : "=w"(result)
8573            : "w"(a)
8574            : /* No clobbers */);
8575   return result;
8576 }
8577 
8578 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmvn_u32(uint32x2_t a)8579 vmvn_u32 (uint32x2_t a)
8580 {
8581   uint32x2_t result;
8582   __asm__ ("mvn %0.8b,%1.8b"
8583            : "=w"(result)
8584            : "w"(a)
8585            : /* No clobbers */);
8586   return result;
8587 }
8588 
8589 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmvnq_p8(poly8x16_t a)8590 vmvnq_p8 (poly8x16_t a)
8591 {
8592   poly8x16_t result;
8593   __asm__ ("mvn %0.16b,%1.16b"
8594            : "=w"(result)
8595            : "w"(a)
8596            : /* No clobbers */);
8597   return result;
8598 }
8599 
8600 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmvnq_s8(int8x16_t a)8601 vmvnq_s8 (int8x16_t a)
8602 {
8603   int8x16_t result;
8604   __asm__ ("mvn %0.16b,%1.16b"
8605            : "=w"(result)
8606            : "w"(a)
8607            : /* No clobbers */);
8608   return result;
8609 }
8610 
8611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmvnq_s16(int16x8_t a)8612 vmvnq_s16 (int16x8_t a)
8613 {
8614   int16x8_t result;
8615   __asm__ ("mvn %0.16b,%1.16b"
8616            : "=w"(result)
8617            : "w"(a)
8618            : /* No clobbers */);
8619   return result;
8620 }
8621 
8622 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmvnq_s32(int32x4_t a)8623 vmvnq_s32 (int32x4_t a)
8624 {
8625   int32x4_t result;
8626   __asm__ ("mvn %0.16b,%1.16b"
8627            : "=w"(result)
8628            : "w"(a)
8629            : /* No clobbers */);
8630   return result;
8631 }
8632 
8633 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmvnq_u8(uint8x16_t a)8634 vmvnq_u8 (uint8x16_t a)
8635 {
8636   uint8x16_t result;
8637   __asm__ ("mvn %0.16b,%1.16b"
8638            : "=w"(result)
8639            : "w"(a)
8640            : /* No clobbers */);
8641   return result;
8642 }
8643 
8644 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmvnq_u16(uint16x8_t a)8645 vmvnq_u16 (uint16x8_t a)
8646 {
8647   uint16x8_t result;
8648   __asm__ ("mvn %0.16b,%1.16b"
8649            : "=w"(result)
8650            : "w"(a)
8651            : /* No clobbers */);
8652   return result;
8653 }
8654 
8655 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmvnq_u32(uint32x4_t a)8656 vmvnq_u32 (uint32x4_t a)
8657 {
8658   uint32x4_t result;
8659   __asm__ ("mvn %0.16b,%1.16b"
8660            : "=w"(result)
8661            : "w"(a)
8662            : /* No clobbers */);
8663   return result;
8664 }
8665 
8666 
8667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpadal_s8(int16x4_t a,int8x8_t b)8668 vpadal_s8 (int16x4_t a, int8x8_t b)
8669 {
8670   int16x4_t result;
8671   __asm__ ("sadalp %0.4h,%2.8b"
8672            : "=w"(result)
8673            : "0"(a), "w"(b)
8674            : /* No clobbers */);
8675   return result;
8676 }
8677 
8678 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpadal_s16(int32x2_t a,int16x4_t b)8679 vpadal_s16 (int32x2_t a, int16x4_t b)
8680 {
8681   int32x2_t result;
8682   __asm__ ("sadalp %0.2s,%2.4h"
8683            : "=w"(result)
8684            : "0"(a), "w"(b)
8685            : /* No clobbers */);
8686   return result;
8687 }
8688 
8689 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vpadal_s32(int64x1_t a,int32x2_t b)8690 vpadal_s32 (int64x1_t a, int32x2_t b)
8691 {
8692   int64x1_t result;
8693   __asm__ ("sadalp %0.1d,%2.2s"
8694            : "=w"(result)
8695            : "0"(a), "w"(b)
8696            : /* No clobbers */);
8697   return result;
8698 }
8699 
8700 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpadal_u8(uint16x4_t a,uint8x8_t b)8701 vpadal_u8 (uint16x4_t a, uint8x8_t b)
8702 {
8703   uint16x4_t result;
8704   __asm__ ("uadalp %0.4h,%2.8b"
8705            : "=w"(result)
8706            : "0"(a), "w"(b)
8707            : /* No clobbers */);
8708   return result;
8709 }
8710 
8711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpadal_u16(uint32x2_t a,uint16x4_t b)8712 vpadal_u16 (uint32x2_t a, uint16x4_t b)
8713 {
8714   uint32x2_t result;
8715   __asm__ ("uadalp %0.2s,%2.4h"
8716            : "=w"(result)
8717            : "0"(a), "w"(b)
8718            : /* No clobbers */);
8719   return result;
8720 }
8721 
8722 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vpadal_u32(uint64x1_t a,uint32x2_t b)8723 vpadal_u32 (uint64x1_t a, uint32x2_t b)
8724 {
8725   uint64x1_t result;
8726   __asm__ ("uadalp %0.1d,%2.2s"
8727            : "=w"(result)
8728            : "0"(a), "w"(b)
8729            : /* No clobbers */);
8730   return result;
8731 }
8732 
8733 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpadalq_s8(int16x8_t a,int8x16_t b)8734 vpadalq_s8 (int16x8_t a, int8x16_t b)
8735 {
8736   int16x8_t result;
8737   __asm__ ("sadalp %0.8h,%2.16b"
8738            : "=w"(result)
8739            : "0"(a), "w"(b)
8740            : /* No clobbers */);
8741   return result;
8742 }
8743 
8744 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpadalq_s16(int32x4_t a,int16x8_t b)8745 vpadalq_s16 (int32x4_t a, int16x8_t b)
8746 {
8747   int32x4_t result;
8748   __asm__ ("sadalp %0.4s,%2.8h"
8749            : "=w"(result)
8750            : "0"(a), "w"(b)
8751            : /* No clobbers */);
8752   return result;
8753 }
8754 
8755 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpadalq_s32(int64x2_t a,int32x4_t b)8756 vpadalq_s32 (int64x2_t a, int32x4_t b)
8757 {
8758   int64x2_t result;
8759   __asm__ ("sadalp %0.2d,%2.4s"
8760            : "=w"(result)
8761            : "0"(a), "w"(b)
8762            : /* No clobbers */);
8763   return result;
8764 }
8765 
8766 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpadalq_u8(uint16x8_t a,uint8x16_t b)8767 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
8768 {
8769   uint16x8_t result;
8770   __asm__ ("uadalp %0.8h,%2.16b"
8771            : "=w"(result)
8772            : "0"(a), "w"(b)
8773            : /* No clobbers */);
8774   return result;
8775 }
8776 
8777 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpadalq_u16(uint32x4_t a,uint16x8_t b)8778 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
8779 {
8780   uint32x4_t result;
8781   __asm__ ("uadalp %0.4s,%2.8h"
8782            : "=w"(result)
8783            : "0"(a), "w"(b)
8784            : /* No clobbers */);
8785   return result;
8786 }
8787 
8788 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpadalq_u32(uint64x2_t a,uint32x4_t b)8789 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
8790 {
8791   uint64x2_t result;
8792   __asm__ ("uadalp %0.2d,%2.4s"
8793            : "=w"(result)
8794            : "0"(a), "w"(b)
8795            : /* No clobbers */);
8796   return result;
8797 }
8798 
8799 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpadd_f32(float32x2_t a,float32x2_t b)8800 vpadd_f32 (float32x2_t a, float32x2_t b)
8801 {
8802   float32x2_t result;
8803   __asm__ ("faddp %0.2s,%1.2s,%2.2s"
8804            : "=w"(result)
8805            : "w"(a), "w"(b)
8806            : /* No clobbers */);
8807   return result;
8808 }
8809 
8810 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpaddl_s8(int8x8_t a)8811 vpaddl_s8 (int8x8_t a)
8812 {
8813   int16x4_t result;
8814   __asm__ ("saddlp %0.4h,%1.8b"
8815            : "=w"(result)
8816            : "w"(a)
8817            : /* No clobbers */);
8818   return result;
8819 }
8820 
8821 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpaddl_s16(int16x4_t a)8822 vpaddl_s16 (int16x4_t a)
8823 {
8824   int32x2_t result;
8825   __asm__ ("saddlp %0.2s,%1.4h"
8826            : "=w"(result)
8827            : "w"(a)
8828            : /* No clobbers */);
8829   return result;
8830 }
8831 
8832 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vpaddl_s32(int32x2_t a)8833 vpaddl_s32 (int32x2_t a)
8834 {
8835   int64x1_t result;
8836   __asm__ ("saddlp %0.1d,%1.2s"
8837            : "=w"(result)
8838            : "w"(a)
8839            : /* No clobbers */);
8840   return result;
8841 }
8842 
8843 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpaddl_u8(uint8x8_t a)8844 vpaddl_u8 (uint8x8_t a)
8845 {
8846   uint16x4_t result;
8847   __asm__ ("uaddlp %0.4h,%1.8b"
8848            : "=w"(result)
8849            : "w"(a)
8850            : /* No clobbers */);
8851   return result;
8852 }
8853 
8854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpaddl_u16(uint16x4_t a)8855 vpaddl_u16 (uint16x4_t a)
8856 {
8857   uint32x2_t result;
8858   __asm__ ("uaddlp %0.2s,%1.4h"
8859            : "=w"(result)
8860            : "w"(a)
8861            : /* No clobbers */);
8862   return result;
8863 }
8864 
8865 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vpaddl_u32(uint32x2_t a)8866 vpaddl_u32 (uint32x2_t a)
8867 {
8868   uint64x1_t result;
8869   __asm__ ("uaddlp %0.1d,%1.2s"
8870            : "=w"(result)
8871            : "w"(a)
8872            : /* No clobbers */);
8873   return result;
8874 }
8875 
8876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpaddlq_s8(int8x16_t a)8877 vpaddlq_s8 (int8x16_t a)
8878 {
8879   int16x8_t result;
8880   __asm__ ("saddlp %0.8h,%1.16b"
8881            : "=w"(result)
8882            : "w"(a)
8883            : /* No clobbers */);
8884   return result;
8885 }
8886 
8887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpaddlq_s16(int16x8_t a)8888 vpaddlq_s16 (int16x8_t a)
8889 {
8890   int32x4_t result;
8891   __asm__ ("saddlp %0.4s,%1.8h"
8892            : "=w"(result)
8893            : "w"(a)
8894            : /* No clobbers */);
8895   return result;
8896 }
8897 
8898 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpaddlq_s32(int32x4_t a)8899 vpaddlq_s32 (int32x4_t a)
8900 {
8901   int64x2_t result;
8902   __asm__ ("saddlp %0.2d,%1.4s"
8903            : "=w"(result)
8904            : "w"(a)
8905            : /* No clobbers */);
8906   return result;
8907 }
8908 
8909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpaddlq_u8(uint8x16_t a)8910 vpaddlq_u8 (uint8x16_t a)
8911 {
8912   uint16x8_t result;
8913   __asm__ ("uaddlp %0.8h,%1.16b"
8914            : "=w"(result)
8915            : "w"(a)
8916            : /* No clobbers */);
8917   return result;
8918 }
8919 
8920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpaddlq_u16(uint16x8_t a)8921 vpaddlq_u16 (uint16x8_t a)
8922 {
8923   uint32x4_t result;
8924   __asm__ ("uaddlp %0.4s,%1.8h"
8925            : "=w"(result)
8926            : "w"(a)
8927            : /* No clobbers */);
8928   return result;
8929 }
8930 
8931 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpaddlq_u32(uint32x4_t a)8932 vpaddlq_u32 (uint32x4_t a)
8933 {
8934   uint64x2_t result;
8935   __asm__ ("uaddlp %0.2d,%1.4s"
8936            : "=w"(result)
8937            : "w"(a)
8938            : /* No clobbers */);
8939   return result;
8940 }
8941 
8942 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpaddq_f32(float32x4_t a,float32x4_t b)8943 vpaddq_f32 (float32x4_t a, float32x4_t b)
8944 {
8945   float32x4_t result;
8946   __asm__ ("faddp %0.4s,%1.4s,%2.4s"
8947            : "=w"(result)
8948            : "w"(a), "w"(b)
8949            : /* No clobbers */);
8950   return result;
8951 }
8952 
8953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpaddq_f64(float64x2_t a,float64x2_t b)8954 vpaddq_f64 (float64x2_t a, float64x2_t b)
8955 {
8956   float64x2_t result;
8957   __asm__ ("faddp %0.2d,%1.2d,%2.2d"
8958            : "=w"(result)
8959            : "w"(a), "w"(b)
8960            : /* No clobbers */);
8961   return result;
8962 }
8963 
8964 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpaddq_s8(int8x16_t a,int8x16_t b)8965 vpaddq_s8 (int8x16_t a, int8x16_t b)
8966 {
8967   int8x16_t result;
8968   __asm__ ("addp %0.16b,%1.16b,%2.16b"
8969            : "=w"(result)
8970            : "w"(a), "w"(b)
8971            : /* No clobbers */);
8972   return result;
8973 }
8974 
8975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpaddq_s16(int16x8_t a,int16x8_t b)8976 vpaddq_s16 (int16x8_t a, int16x8_t b)
8977 {
8978   int16x8_t result;
8979   __asm__ ("addp %0.8h,%1.8h,%2.8h"
8980            : "=w"(result)
8981            : "w"(a), "w"(b)
8982            : /* No clobbers */);
8983   return result;
8984 }
8985 
8986 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpaddq_s32(int32x4_t a,int32x4_t b)8987 vpaddq_s32 (int32x4_t a, int32x4_t b)
8988 {
8989   int32x4_t result;
8990   __asm__ ("addp %0.4s,%1.4s,%2.4s"
8991            : "=w"(result)
8992            : "w"(a), "w"(b)
8993            : /* No clobbers */);
8994   return result;
8995 }
8996 
8997 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpaddq_s64(int64x2_t a,int64x2_t b)8998 vpaddq_s64 (int64x2_t a, int64x2_t b)
8999 {
9000   int64x2_t result;
9001   __asm__ ("addp %0.2d,%1.2d,%2.2d"
9002            : "=w"(result)
9003            : "w"(a), "w"(b)
9004            : /* No clobbers */);
9005   return result;
9006 }
9007 
9008 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpaddq_u8(uint8x16_t a,uint8x16_t b)9009 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9010 {
9011   uint8x16_t result;
9012   __asm__ ("addp %0.16b,%1.16b,%2.16b"
9013            : "=w"(result)
9014            : "w"(a), "w"(b)
9015            : /* No clobbers */);
9016   return result;
9017 }
9018 
9019 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpaddq_u16(uint16x8_t a,uint16x8_t b)9020 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9021 {
9022   uint16x8_t result;
9023   __asm__ ("addp %0.8h,%1.8h,%2.8h"
9024            : "=w"(result)
9025            : "w"(a), "w"(b)
9026            : /* No clobbers */);
9027   return result;
9028 }
9029 
9030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpaddq_u32(uint32x4_t a,uint32x4_t b)9031 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9032 {
9033   uint32x4_t result;
9034   __asm__ ("addp %0.4s,%1.4s,%2.4s"
9035            : "=w"(result)
9036            : "w"(a), "w"(b)
9037            : /* No clobbers */);
9038   return result;
9039 }
9040 
9041 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpaddq_u64(uint64x2_t a,uint64x2_t b)9042 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9043 {
9044   uint64x2_t result;
9045   __asm__ ("addp %0.2d,%1.2d,%2.2d"
9046            : "=w"(result)
9047            : "w"(a), "w"(b)
9048            : /* No clobbers */);
9049   return result;
9050 }
9051 
9052 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpadds_f32(float32x2_t a)9053 vpadds_f32 (float32x2_t a)
9054 {
9055   float32_t result;
9056   __asm__ ("faddp %s0,%1.2s"
9057            : "=w"(result)
9058            : "w"(a)
9059            : /* No clobbers */);
9060   return result;
9061 }
9062 
9063 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_n_s16(int16x4_t a,int16_t b)9064 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9065 {
9066   int16x4_t result;
9067   __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9068            : "=w"(result)
9069            : "w"(a), "x"(b)
9070            : /* No clobbers */);
9071   return result;
9072 }
9073 
9074 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_n_s32(int32x2_t a,int32_t b)9075 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9076 {
9077   int32x2_t result;
9078   __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9079            : "=w"(result)
9080            : "w"(a), "w"(b)
9081            : /* No clobbers */);
9082   return result;
9083 }
9084 
9085 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_n_s16(int16x8_t a,int16_t b)9086 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9087 {
9088   int16x8_t result;
9089   __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9090            : "=w"(result)
9091            : "w"(a), "x"(b)
9092            : /* No clobbers */);
9093   return result;
9094 }
9095 
9096 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_n_s32(int32x4_t a,int32_t b)9097 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9098 {
9099   int32x4_t result;
9100   __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9101            : "=w"(result)
9102            : "w"(a), "w"(b)
9103            : /* No clobbers */);
9104   return result;
9105 }
9106 
9107 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqmovn_high_s16(int8x8_t a,int16x8_t b)9108 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
9109 {
9110   int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9111   __asm__ ("sqxtn2 %0.16b, %1.8h"
9112            : "+w"(result)
9113            : "w"(b)
9114            : /* No clobbers */);
9115   return result;
9116 }
9117 
9118 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqmovn_high_s32(int16x4_t a,int32x4_t b)9119 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
9120 {
9121   int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
9122   __asm__ ("sqxtn2 %0.8h, %1.4s"
9123            : "+w"(result)
9124            : "w"(b)
9125            : /* No clobbers */);
9126   return result;
9127 }
9128 
9129 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqmovn_high_s64(int32x2_t a,int64x2_t b)9130 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
9131 {
9132   int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
9133   __asm__ ("sqxtn2 %0.4s, %1.2d"
9134            : "+w"(result)
9135            : "w"(b)
9136            : /* No clobbers */);
9137   return result;
9138 }
9139 
9140 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovn_high_u16(uint8x8_t a,uint16x8_t b)9141 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9142 {
9143   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9144   __asm__ ("uqxtn2 %0.16b, %1.8h"
9145            : "+w"(result)
9146            : "w"(b)
9147            : /* No clobbers */);
9148   return result;
9149 }
9150 
9151 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovn_high_u32(uint16x4_t a,uint32x4_t b)9152 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9153 {
9154   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9155   __asm__ ("uqxtn2 %0.8h, %1.4s"
9156            : "+w"(result)
9157            : "w"(b)
9158            : /* No clobbers */);
9159   return result;
9160 }
9161 
9162 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovn_high_u64(uint32x2_t a,uint64x2_t b)9163 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9164 {
9165   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9166   __asm__ ("uqxtn2 %0.4s, %1.2d"
9167            : "+w"(result)
9168            : "w"(b)
9169            : /* No clobbers */);
9170   return result;
9171 }
9172 
9173 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovun_high_s16(uint8x8_t a,int16x8_t b)9174 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
9175 {
9176   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9177   __asm__ ("sqxtun2 %0.16b, %1.8h"
9178            : "+w"(result)
9179            : "w"(b)
9180            : /* No clobbers */);
9181   return result;
9182 }
9183 
9184 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovun_high_s32(uint16x4_t a,int32x4_t b)9185 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
9186 {
9187   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9188   __asm__ ("sqxtun2 %0.8h, %1.4s"
9189            : "+w"(result)
9190            : "w"(b)
9191            : /* No clobbers */);
9192   return result;
9193 }
9194 
9195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovun_high_s64(uint32x2_t a,int64x2_t b)9196 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
9197 {
9198   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9199   __asm__ ("sqxtun2 %0.4s, %1.2d"
9200            : "+w"(result)
9201            : "w"(b)
9202            : /* No clobbers */);
9203   return result;
9204 }
9205 
9206 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_n_s16(int16x4_t a,int16_t b)9207 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
9208 {
9209   int16x4_t result;
9210   __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
9211            : "=w"(result)
9212            : "w"(a), "x"(b)
9213            : /* No clobbers */);
9214   return result;
9215 }
9216 
9217 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_n_s32(int32x2_t a,int32_t b)9218 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
9219 {
9220   int32x2_t result;
9221   __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
9222            : "=w"(result)
9223            : "w"(a), "w"(b)
9224            : /* No clobbers */);
9225   return result;
9226 }
9227 
9228 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_n_s16(int16x8_t a,int16_t b)9229 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
9230 {
9231   int16x8_t result;
9232   __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
9233            : "=w"(result)
9234            : "w"(a), "x"(b)
9235            : /* No clobbers */);
9236   return result;
9237 }
9238 
9239 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_n_s32(int32x4_t a,int32_t b)9240 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
9241 {
9242   int32x4_t result;
9243   __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
9244            : "=w"(result)
9245            : "w"(a), "w"(b)
9246            : /* No clobbers */);
9247   return result;
9248 }
9249 
9250 #define vqrshrn_high_n_s16(a, b, c)                                     \
9251   __extension__                                                         \
9252     ({                                                                  \
9253        int16x8_t b_ = (b);                                              \
9254        int8x8_t a_ = (a);                                               \
9255        int8x16_t result = vcombine_s8                                   \
9256                             (a_, vcreate_s8                             \
9257                                    (__AARCH64_UINT64_C (0x0)));         \
9258        __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
9259                 : "+w"(result)                                          \
9260                 : "w"(b_), "i"(c)                                       \
9261                 : /* No clobbers */);                                   \
9262        result;                                                          \
9263      })
9264 
9265 #define vqrshrn_high_n_s32(a, b, c)                                     \
9266   __extension__                                                         \
9267     ({                                                                  \
9268        int32x4_t b_ = (b);                                              \
9269        int16x4_t a_ = (a);                                              \
9270        int16x8_t result = vcombine_s16                                  \
9271                             (a_, vcreate_s16                            \
9272                                    (__AARCH64_UINT64_C (0x0)));         \
9273        __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
9274                 : "+w"(result)                                          \
9275                 : "w"(b_), "i"(c)                                       \
9276                 : /* No clobbers */);                                   \
9277        result;                                                          \
9278      })
9279 
9280 #define vqrshrn_high_n_s64(a, b, c)                                     \
9281   __extension__                                                         \
9282     ({                                                                  \
9283        int64x2_t b_ = (b);                                              \
9284        int32x2_t a_ = (a);                                              \
9285        int32x4_t result = vcombine_s32                                  \
9286                             (a_, vcreate_s32                            \
9287                                    (__AARCH64_UINT64_C (0x0)));         \
9288        __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
9289                 : "+w"(result)                                          \
9290                 : "w"(b_), "i"(c)                                       \
9291                 : /* No clobbers */);                                   \
9292        result;                                                          \
9293      })
9294 
9295 #define vqrshrn_high_n_u16(a, b, c)                                     \
9296   __extension__                                                         \
9297     ({                                                                  \
9298        uint16x8_t b_ = (b);                                             \
9299        uint8x8_t a_ = (a);                                              \
9300        uint8x16_t result = vcombine_u8                                  \
9301                              (a_, vcreate_u8                            \
9302                                     (__AARCH64_UINT64_C (0x0)));        \
9303        __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
9304                 : "+w"(result)                                          \
9305                 : "w"(b_), "i"(c)                                       \
9306                 : /* No clobbers */);                                   \
9307        result;                                                          \
9308      })
9309 
9310 #define vqrshrn_high_n_u32(a, b, c)                                     \
9311   __extension__                                                         \
9312     ({                                                                  \
9313        uint32x4_t b_ = (b);                                             \
9314        uint16x4_t a_ = (a);                                             \
9315        uint16x8_t result = vcombine_u16                                 \
9316                              (a_, vcreate_u16                           \
9317                                     (__AARCH64_UINT64_C (0x0)));        \
9318        __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
9319                 : "+w"(result)                                          \
9320                 : "w"(b_), "i"(c)                                       \
9321                 : /* No clobbers */);                                   \
9322        result;                                                          \
9323      })
9324 
9325 #define vqrshrn_high_n_u64(a, b, c)                                     \
9326   __extension__                                                         \
9327     ({                                                                  \
9328        uint64x2_t b_ = (b);                                             \
9329        uint32x2_t a_ = (a);                                             \
9330        uint32x4_t result = vcombine_u32                                 \
9331                              (a_, vcreate_u32                           \
9332                                     (__AARCH64_UINT64_C (0x0)));        \
9333        __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
9334                 : "+w"(result)                                          \
9335                 : "w"(b_), "i"(c)                                       \
9336                 : /* No clobbers */);                                   \
9337        result;                                                          \
9338      })
9339 
9340 #define vqrshrun_high_n_s16(a, b, c)                                    \
9341   __extension__                                                         \
9342     ({                                                                  \
9343        int16x8_t b_ = (b);                                              \
9344        uint8x8_t a_ = (a);                                              \
9345        uint8x16_t result = vcombine_u8                                  \
9346                              (a_, vcreate_u8                            \
9347                                     (__AARCH64_UINT64_C (0x0)));        \
9348        __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
9349                 : "+w"(result)                                          \
9350                 : "w"(b_), "i"(c)                                       \
9351                 : /* No clobbers */);                                   \
9352        result;                                                          \
9353      })
9354 
9355 #define vqrshrun_high_n_s32(a, b, c)                                    \
9356   __extension__                                                         \
9357     ({                                                                  \
9358        int32x4_t b_ = (b);                                              \
9359        uint16x4_t a_ = (a);                                             \
9360        uint16x8_t result = vcombine_u16                                 \
9361                              (a_, vcreate_u16                           \
9362                                     (__AARCH64_UINT64_C (0x0)));        \
9363        __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
9364                 : "+w"(result)                                          \
9365                 : "w"(b_), "i"(c)                                       \
9366                 : /* No clobbers */);                                   \
9367        result;                                                          \
9368      })
9369 
9370 #define vqrshrun_high_n_s64(a, b, c)                                    \
9371   __extension__                                                         \
9372     ({                                                                  \
9373        int64x2_t b_ = (b);                                              \
9374        uint32x2_t a_ = (a);                                             \
9375        uint32x4_t result = vcombine_u32                                 \
9376                              (a_, vcreate_u32                           \
9377                                     (__AARCH64_UINT64_C (0x0)));        \
9378        __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
9379                 : "+w"(result)                                          \
9380                 : "w"(b_), "i"(c)                                       \
9381                 : /* No clobbers */);                                   \
9382        result;                                                          \
9383      })
9384 
9385 #define vqshrn_high_n_s16(a, b, c)                                      \
9386   __extension__                                                         \
9387     ({                                                                  \
9388        int16x8_t b_ = (b);                                              \
9389        int8x8_t a_ = (a);                                               \
9390        int8x16_t result = vcombine_s8                                   \
9391                             (a_, vcreate_s8                             \
9392                                    (__AARCH64_UINT64_C (0x0)));         \
9393        __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
9394                 : "+w"(result)                                          \
9395                 : "w"(b_), "i"(c)                                       \
9396                 : /* No clobbers */);                                   \
9397        result;                                                          \
9398      })
9399 
9400 #define vqshrn_high_n_s32(a, b, c)                                      \
9401   __extension__                                                         \
9402     ({                                                                  \
9403        int32x4_t b_ = (b);                                              \
9404        int16x4_t a_ = (a);                                              \
9405        int16x8_t result = vcombine_s16                                  \
9406                             (a_, vcreate_s16                            \
9407                                    (__AARCH64_UINT64_C (0x0)));         \
9408        __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
9409                 : "+w"(result)                                          \
9410                 : "w"(b_), "i"(c)                                       \
9411                 : /* No clobbers */);                                   \
9412        result;                                                          \
9413      })
9414 
9415 #define vqshrn_high_n_s64(a, b, c)                                      \
9416   __extension__                                                         \
9417     ({                                                                  \
9418        int64x2_t b_ = (b);                                              \
9419        int32x2_t a_ = (a);                                              \
9420        int32x4_t result = vcombine_s32                                  \
9421                             (a_, vcreate_s32                            \
9422                                    (__AARCH64_UINT64_C (0x0)));         \
9423        __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
9424                 : "+w"(result)                                          \
9425                 : "w"(b_), "i"(c)                                       \
9426                 : /* No clobbers */);                                   \
9427        result;                                                          \
9428      })
9429 
9430 #define vqshrn_high_n_u16(a, b, c)                                      \
9431   __extension__                                                         \
9432     ({                                                                  \
9433        uint16x8_t b_ = (b);                                             \
9434        uint8x8_t a_ = (a);                                              \
9435        uint8x16_t result = vcombine_u8                                  \
9436                              (a_, vcreate_u8                            \
9437                                     (__AARCH64_UINT64_C (0x0)));        \
9438        __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
9439                 : "+w"(result)                                          \
9440                 : "w"(b_), "i"(c)                                       \
9441                 : /* No clobbers */);                                   \
9442        result;                                                          \
9443      })
9444 
9445 #define vqshrn_high_n_u32(a, b, c)                                      \
9446   __extension__                                                         \
9447     ({                                                                  \
9448        uint32x4_t b_ = (b);                                             \
9449        uint16x4_t a_ = (a);                                             \
9450        uint16x8_t result = vcombine_u16                                 \
9451                              (a_, vcreate_u16                           \
9452                                     (__AARCH64_UINT64_C (0x0)));        \
9453        __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
9454                 : "+w"(result)                                          \
9455                 : "w"(b_), "i"(c)                                       \
9456                 : /* No clobbers */);                                   \
9457        result;                                                          \
9458      })
9459 
9460 #define vqshrn_high_n_u64(a, b, c)                                      \
9461   __extension__                                                         \
9462     ({                                                                  \
9463        uint64x2_t b_ = (b);                                             \
9464        uint32x2_t a_ = (a);                                             \
9465        uint32x4_t result = vcombine_u32                                 \
9466                              (a_, vcreate_u32                           \
9467                                     (__AARCH64_UINT64_C (0x0)));        \
9468        __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
9469                 : "+w"(result)                                          \
9470                 : "w"(b_), "i"(c)                                       \
9471                 : /* No clobbers */);                                   \
9472        result;                                                          \
9473      })
9474 
9475 #define vqshrun_high_n_s16(a, b, c)                                     \
9476   __extension__                                                         \
9477     ({                                                                  \
9478        int16x8_t b_ = (b);                                              \
9479        uint8x8_t a_ = (a);                                              \
9480        uint8x16_t result = vcombine_u8                                  \
9481                              (a_, vcreate_u8                            \
9482                                     (__AARCH64_UINT64_C (0x0)));        \
9483        __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
9484                 : "+w"(result)                                          \
9485                 : "w"(b_), "i"(c)                                       \
9486                 : /* No clobbers */);                                   \
9487        result;                                                          \
9488      })
9489 
9490 #define vqshrun_high_n_s32(a, b, c)                                     \
9491   __extension__                                                         \
9492     ({                                                                  \
9493        int32x4_t b_ = (b);                                              \
9494        uint16x4_t a_ = (a);                                             \
9495        uint16x8_t result = vcombine_u16                                 \
9496                              (a_, vcreate_u16                           \
9497                                     (__AARCH64_UINT64_C (0x0)));        \
9498        __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
9499                 : "+w"(result)                                          \
9500                 : "w"(b_), "i"(c)                                       \
9501                 : /* No clobbers */);                                   \
9502        result;                                                          \
9503      })
9504 
9505 #define vqshrun_high_n_s64(a, b, c)                                     \
9506   __extension__                                                         \
9507     ({                                                                  \
9508        int64x2_t b_ = (b);                                              \
9509        uint32x2_t a_ = (a);                                             \
9510        uint32x4_t result = vcombine_u32                                 \
9511                              (a_, vcreate_u32                           \
9512                                     (__AARCH64_UINT64_C (0x0)));        \
9513        __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
9514                 : "+w"(result)                                          \
9515                 : "w"(b_), "i"(c)                                       \
9516                 : /* No clobbers */);                                   \
9517        result;                                                          \
9518      })
9519 
9520 #define vrshrn_high_n_s16(a, b, c)                                      \
9521   __extension__                                                         \
9522     ({                                                                  \
9523        int16x8_t b_ = (b);                                              \
9524        int8x8_t a_ = (a);                                               \
9525        int8x16_t result = vcombine_s8                                   \
9526                             (a_, vcreate_s8                             \
9527                                    (__AARCH64_UINT64_C (0x0)));         \
9528        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
9529                 : "+w"(result)                                          \
9530                 : "w"(b_), "i"(c)                                       \
9531                 : /* No clobbers */);                                   \
9532        result;                                                          \
9533      })
9534 
9535 #define vrshrn_high_n_s32(a, b, c)                                      \
9536   __extension__                                                         \
9537     ({                                                                  \
9538        int32x4_t b_ = (b);                                              \
9539        int16x4_t a_ = (a);                                              \
9540        int16x8_t result = vcombine_s16                                  \
9541                             (a_, vcreate_s16                            \
9542                                    (__AARCH64_UINT64_C (0x0)));         \
9543        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
9544                 : "+w"(result)                                          \
9545                 : "w"(b_), "i"(c)                                       \
9546                 : /* No clobbers */);                                   \
9547        result;                                                          \
9548      })
9549 
9550 #define vrshrn_high_n_s64(a, b, c)                                      \
9551   __extension__                                                         \
9552     ({                                                                  \
9553        int64x2_t b_ = (b);                                              \
9554        int32x2_t a_ = (a);                                              \
9555        int32x4_t result = vcombine_s32                                  \
9556                             (a_, vcreate_s32                            \
9557                                    (__AARCH64_UINT64_C (0x0)));         \
9558        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
9559                 : "+w"(result)                                          \
9560                 : "w"(b_), "i"(c)                                       \
9561                 : /* No clobbers */);                                   \
9562        result;                                                          \
9563      })
9564 
9565 #define vrshrn_high_n_u16(a, b, c)                                      \
9566   __extension__                                                         \
9567     ({                                                                  \
9568        uint16x8_t b_ = (b);                                             \
9569        uint8x8_t a_ = (a);                                              \
9570        uint8x16_t result = vcombine_u8                                  \
9571                             (a_, vcreate_u8                             \
9572                                    (__AARCH64_UINT64_C (0x0)));         \
9573        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
9574                 : "+w"(result)                                          \
9575                 : "w"(b_), "i"(c)                                       \
9576                 : /* No clobbers */);                                   \
9577        result;                                                          \
9578      })
9579 
9580 #define vrshrn_high_n_u32(a, b, c)                                      \
9581   __extension__                                                         \
9582     ({                                                                  \
9583        uint32x4_t b_ = (b);                                             \
9584        uint16x4_t a_ = (a);                                             \
9585        uint16x8_t result = vcombine_u16                                 \
9586                             (a_, vcreate_u16                            \
9587                                    (__AARCH64_UINT64_C (0x0)));         \
9588        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
9589                 : "+w"(result)                                          \
9590                 : "w"(b_), "i"(c)                                       \
9591                 : /* No clobbers */);                                   \
9592        result;                                                          \
9593      })
9594 
9595 #define vrshrn_high_n_u64(a, b, c)                                      \
9596   __extension__                                                         \
9597     ({                                                                  \
9598        uint64x2_t b_ = (b);                                             \
9599        uint32x2_t a_ = (a);                                             \
9600        uint32x4_t result = vcombine_u32                                 \
9601                             (a_, vcreate_u32                            \
9602                                    (__AARCH64_UINT64_C (0x0)));         \
9603        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
9604                 : "+w"(result)                                          \
9605                 : "w"(b_), "i"(c)                                       \
9606                 : /* No clobbers */);                                   \
9607        result;                                                          \
9608      })
9609 
9610 #define vrshrn_n_s16(a, b)                                              \
9611   __extension__                                                         \
9612     ({                                                                  \
9613        int16x8_t a_ = (a);                                              \
9614        int8x8_t result;                                                 \
9615        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
9616                 : "=w"(result)                                          \
9617                 : "w"(a_), "i"(b)                                       \
9618                 : /* No clobbers */);                                   \
9619        result;                                                          \
9620      })
9621 
9622 #define vrshrn_n_s32(a, b)                                              \
9623   __extension__                                                         \
9624     ({                                                                  \
9625        int32x4_t a_ = (a);                                              \
9626        int16x4_t result;                                                \
9627        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
9628                 : "=w"(result)                                          \
9629                 : "w"(a_), "i"(b)                                       \
9630                 : /* No clobbers */);                                   \
9631        result;                                                          \
9632      })
9633 
9634 #define vrshrn_n_s64(a, b)                                              \
9635   __extension__                                                         \
9636     ({                                                                  \
9637        int64x2_t a_ = (a);                                              \
9638        int32x2_t result;                                                \
9639        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
9640                 : "=w"(result)                                          \
9641                 : "w"(a_), "i"(b)                                       \
9642                 : /* No clobbers */);                                   \
9643        result;                                                          \
9644      })
9645 
9646 #define vrshrn_n_u16(a, b)                                              \
9647   __extension__                                                         \
9648     ({                                                                  \
9649        uint16x8_t a_ = (a);                                             \
9650        uint8x8_t result;                                                \
9651        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
9652                 : "=w"(result)                                          \
9653                 : "w"(a_), "i"(b)                                       \
9654                 : /* No clobbers */);                                   \
9655        result;                                                          \
9656      })
9657 
9658 #define vrshrn_n_u32(a, b)                                              \
9659   __extension__                                                         \
9660     ({                                                                  \
9661        uint32x4_t a_ = (a);                                             \
9662        uint16x4_t result;                                               \
9663        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
9664                 : "=w"(result)                                          \
9665                 : "w"(a_), "i"(b)                                       \
9666                 : /* No clobbers */);                                   \
9667        result;                                                          \
9668      })
9669 
9670 #define vrshrn_n_u64(a, b)                                              \
9671   __extension__                                                         \
9672     ({                                                                  \
9673        uint64x2_t a_ = (a);                                             \
9674        uint32x2_t result;                                               \
9675        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
9676                 : "=w"(result)                                          \
9677                 : "w"(a_), "i"(b)                                       \
9678                 : /* No clobbers */);                                   \
9679        result;                                                          \
9680      })
9681 
9682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrsqrte_f32(float32x2_t a)9683 vrsqrte_f32 (float32x2_t a)
9684 {
9685   float32x2_t result;
9686   __asm__ ("frsqrte %0.2s,%1.2s"
9687            : "=w"(result)
9688            : "w"(a)
9689            : /* No clobbers */);
9690   return result;
9691 }
9692 
9693 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrsqrte_f64(float64x1_t a)9694 vrsqrte_f64 (float64x1_t a)
9695 {
9696   float64x1_t result;
9697   __asm__ ("frsqrte %d0,%d1"
9698            : "=w"(result)
9699            : "w"(a)
9700            : /* No clobbers */);
9701   return result;
9702 }
9703 
9704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsqrte_u32(uint32x2_t a)9705 vrsqrte_u32 (uint32x2_t a)
9706 {
9707   uint32x2_t result;
9708   __asm__ ("ursqrte %0.2s,%1.2s"
9709            : "=w"(result)
9710            : "w"(a)
9711            : /* No clobbers */);
9712   return result;
9713 }
9714 
9715 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrsqrted_f64(float64_t a)9716 vrsqrted_f64 (float64_t a)
9717 {
9718   float64_t result;
9719   __asm__ ("frsqrte %d0,%d1"
9720            : "=w"(result)
9721            : "w"(a)
9722            : /* No clobbers */);
9723   return result;
9724 }
9725 
9726 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrsqrteq_f32(float32x4_t a)9727 vrsqrteq_f32 (float32x4_t a)
9728 {
9729   float32x4_t result;
9730   __asm__ ("frsqrte %0.4s,%1.4s"
9731            : "=w"(result)
9732            : "w"(a)
9733            : /* No clobbers */);
9734   return result;
9735 }
9736 
9737 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsqrteq_f64(float64x2_t a)9738 vrsqrteq_f64 (float64x2_t a)
9739 {
9740   float64x2_t result;
9741   __asm__ ("frsqrte %0.2d,%1.2d"
9742            : "=w"(result)
9743            : "w"(a)
9744            : /* No clobbers */);
9745   return result;
9746 }
9747 
9748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsqrteq_u32(uint32x4_t a)9749 vrsqrteq_u32 (uint32x4_t a)
9750 {
9751   uint32x4_t result;
9752   __asm__ ("ursqrte %0.4s,%1.4s"
9753            : "=w"(result)
9754            : "w"(a)
9755            : /* No clobbers */);
9756   return result;
9757 }
9758 
9759 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrsqrtes_f32(float32_t a)9760 vrsqrtes_f32 (float32_t a)
9761 {
9762   float32_t result;
9763   __asm__ ("frsqrte %s0,%s1"
9764            : "=w"(result)
9765            : "w"(a)
9766            : /* No clobbers */);
9767   return result;
9768 }
9769 
9770 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrsqrts_f32(float32x2_t a,float32x2_t b)9771 vrsqrts_f32 (float32x2_t a, float32x2_t b)
9772 {
9773   float32x2_t result;
9774   __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
9775            : "=w"(result)
9776            : "w"(a), "w"(b)
9777            : /* No clobbers */);
9778   return result;
9779 }
9780 
9781 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrsqrtsd_f64(float64_t a,float64_t b)9782 vrsqrtsd_f64 (float64_t a, float64_t b)
9783 {
9784   float64_t result;
9785   __asm__ ("frsqrts %d0,%d1,%d2"
9786            : "=w"(result)
9787            : "w"(a), "w"(b)
9788            : /* No clobbers */);
9789   return result;
9790 }
9791 
9792 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrsqrtsq_f32(float32x4_t a,float32x4_t b)9793 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
9794 {
9795   float32x4_t result;
9796   __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
9797            : "=w"(result)
9798            : "w"(a), "w"(b)
9799            : /* No clobbers */);
9800   return result;
9801 }
9802 
9803 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsqrtsq_f64(float64x2_t a,float64x2_t b)9804 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
9805 {
9806   float64x2_t result;
9807   __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
9808            : "=w"(result)
9809            : "w"(a), "w"(b)
9810            : /* No clobbers */);
9811   return result;
9812 }
9813 
9814 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrsqrtss_f32(float32_t a,float32_t b)9815 vrsqrtss_f32 (float32_t a, float32_t b)
9816 {
9817   float32_t result;
9818   __asm__ ("frsqrts %s0,%s1,%s2"
9819            : "=w"(result)
9820            : "w"(a), "w"(b)
9821            : /* No clobbers */);
9822   return result;
9823 }
9824 
9825 #define vshrn_high_n_s16(a, b, c)                                       \
9826   __extension__                                                         \
9827     ({                                                                  \
9828        int16x8_t b_ = (b);                                              \
9829        int8x8_t a_ = (a);                                               \
9830        int8x16_t result = vcombine_s8                                   \
9831                             (a_, vcreate_s8                             \
9832                                    (__AARCH64_UINT64_C (0x0)));         \
9833        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
9834                 : "+w"(result)                                          \
9835                 : "w"(b_), "i"(c)                                       \
9836                 : /* No clobbers */);                                   \
9837        result;                                                          \
9838      })
9839 
9840 #define vshrn_high_n_s32(a, b, c)                                       \
9841   __extension__                                                         \
9842     ({                                                                  \
9843        int32x4_t b_ = (b);                                              \
9844        int16x4_t a_ = (a);                                              \
9845        int16x8_t result = vcombine_s16                                  \
9846                             (a_, vcreate_s16                            \
9847                                    (__AARCH64_UINT64_C (0x0)));         \
9848        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
9849                 : "+w"(result)                                          \
9850                 : "w"(b_), "i"(c)                                       \
9851                 : /* No clobbers */);                                   \
9852        result;                                                          \
9853      })
9854 
9855 #define vshrn_high_n_s64(a, b, c)                                       \
9856   __extension__                                                         \
9857     ({                                                                  \
9858        int64x2_t b_ = (b);                                              \
9859        int32x2_t a_ = (a);                                              \
9860        int32x4_t result = vcombine_s32                                  \
9861                             (a_, vcreate_s32                            \
9862                                    (__AARCH64_UINT64_C (0x0)));         \
9863        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
9864                 : "+w"(result)                                          \
9865                 : "w"(b_), "i"(c)                                       \
9866                 : /* No clobbers */);                                   \
9867        result;                                                          \
9868      })
9869 
9870 #define vshrn_high_n_u16(a, b, c)                                       \
9871   __extension__                                                         \
9872     ({                                                                  \
9873        uint16x8_t b_ = (b);                                             \
9874        uint8x8_t a_ = (a);                                              \
9875        uint8x16_t result = vcombine_u8                                  \
9876                             (a_, vcreate_u8                             \
9877                                    (__AARCH64_UINT64_C (0x0)));         \
9878        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
9879                 : "+w"(result)                                          \
9880                 : "w"(b_), "i"(c)                                       \
9881                 : /* No clobbers */);                                   \
9882        result;                                                          \
9883      })
9884 
9885 #define vshrn_high_n_u32(a, b, c)                                       \
9886   __extension__                                                         \
9887     ({                                                                  \
9888        uint32x4_t b_ = (b);                                             \
9889        uint16x4_t a_ = (a);                                             \
9890        uint16x8_t result = vcombine_u16                                 \
9891                             (a_, vcreate_u16                            \
9892                                    (__AARCH64_UINT64_C (0x0)));         \
9893        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
9894                 : "+w"(result)                                          \
9895                 : "w"(b_), "i"(c)                                       \
9896                 : /* No clobbers */);                                   \
9897        result;                                                          \
9898      })
9899 
9900 #define vshrn_high_n_u64(a, b, c)                                       \
9901   __extension__                                                         \
9902     ({                                                                  \
9903        uint64x2_t b_ = (b);                                             \
9904        uint32x2_t a_ = (a);                                             \
9905        uint32x4_t result = vcombine_u32                                 \
9906                             (a_, vcreate_u32                            \
9907                                    (__AARCH64_UINT64_C (0x0)));         \
9908        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
9909                 : "+w"(result)                                          \
9910                 : "w"(b_), "i"(c)                                       \
9911                 : /* No clobbers */);                                   \
9912        result;                                                          \
9913      })
9914 
9915 #define vshrn_n_s16(a, b)                                               \
9916   __extension__                                                         \
9917     ({                                                                  \
9918        int16x8_t a_ = (a);                                              \
9919        int8x8_t result;                                                 \
9920        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
9921                 : "=w"(result)                                          \
9922                 : "w"(a_), "i"(b)                                       \
9923                 : /* No clobbers */);                                   \
9924        result;                                                          \
9925      })
9926 
9927 #define vshrn_n_s32(a, b)                                               \
9928   __extension__                                                         \
9929     ({                                                                  \
9930        int32x4_t a_ = (a);                                              \
9931        int16x4_t result;                                                \
9932        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
9933                 : "=w"(result)                                          \
9934                 : "w"(a_), "i"(b)                                       \
9935                 : /* No clobbers */);                                   \
9936        result;                                                          \
9937      })
9938 
9939 #define vshrn_n_s64(a, b)                                               \
9940   __extension__                                                         \
9941     ({                                                                  \
9942        int64x2_t a_ = (a);                                              \
9943        int32x2_t result;                                                \
9944        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
9945                 : "=w"(result)                                          \
9946                 : "w"(a_), "i"(b)                                       \
9947                 : /* No clobbers */);                                   \
9948        result;                                                          \
9949      })
9950 
9951 #define vshrn_n_u16(a, b)                                               \
9952   __extension__                                                         \
9953     ({                                                                  \
9954        uint16x8_t a_ = (a);                                             \
9955        uint8x8_t result;                                                \
9956        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
9957                 : "=w"(result)                                          \
9958                 : "w"(a_), "i"(b)                                       \
9959                 : /* No clobbers */);                                   \
9960        result;                                                          \
9961      })
9962 
9963 #define vshrn_n_u32(a, b)                                               \
9964   __extension__                                                         \
9965     ({                                                                  \
9966        uint32x4_t a_ = (a);                                             \
9967        uint16x4_t result;                                               \
9968        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
9969                 : "=w"(result)                                          \
9970                 : "w"(a_), "i"(b)                                       \
9971                 : /* No clobbers */);                                   \
9972        result;                                                          \
9973      })
9974 
9975 #define vshrn_n_u64(a, b)                                               \
9976   __extension__                                                         \
9977     ({                                                                  \
9978        uint64x2_t a_ = (a);                                             \
9979        uint32x2_t result;                                               \
9980        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
9981                 : "=w"(result)                                          \
9982                 : "w"(a_), "i"(b)                                       \
9983                 : /* No clobbers */);                                   \
9984        result;                                                          \
9985      })
9986 
9987 #define vsli_n_p8(a, b, c)                                              \
9988   __extension__                                                         \
9989     ({                                                                  \
9990        poly8x8_t b_ = (b);                                              \
9991        poly8x8_t a_ = (a);                                              \
9992        poly8x8_t result;                                                \
9993        __asm__ ("sli %0.8b,%2.8b,%3"                                    \
9994                 : "=w"(result)                                          \
9995                 : "0"(a_), "w"(b_), "i"(c)                              \
9996                 : /* No clobbers */);                                   \
9997        result;                                                          \
9998      })
9999 
10000 #define vsli_n_p16(a, b, c)                                             \
10001   __extension__                                                         \
10002     ({                                                                  \
10003        poly16x4_t b_ = (b);                                             \
10004        poly16x4_t a_ = (a);                                             \
10005        poly16x4_t result;                                               \
10006        __asm__ ("sli %0.4h,%2.4h,%3"                                    \
10007                 : "=w"(result)                                          \
10008                 : "0"(a_), "w"(b_), "i"(c)                              \
10009                 : /* No clobbers */);                                   \
10010        result;                                                          \
10011      })
10012 
10013 #define vsliq_n_p8(a, b, c)                                             \
10014   __extension__                                                         \
10015     ({                                                                  \
10016        poly8x16_t b_ = (b);                                             \
10017        poly8x16_t a_ = (a);                                             \
10018        poly8x16_t result;                                               \
10019        __asm__ ("sli %0.16b,%2.16b,%3"                                  \
10020                 : "=w"(result)                                          \
10021                 : "0"(a_), "w"(b_), "i"(c)                              \
10022                 : /* No clobbers */);                                   \
10023        result;                                                          \
10024      })
10025 
10026 #define vsliq_n_p16(a, b, c)                                            \
10027   __extension__                                                         \
10028     ({                                                                  \
10029        poly16x8_t b_ = (b);                                             \
10030        poly16x8_t a_ = (a);                                             \
10031        poly16x8_t result;                                               \
10032        __asm__ ("sli %0.8h,%2.8h,%3"                                    \
10033                 : "=w"(result)                                          \
10034                 : "0"(a_), "w"(b_), "i"(c)                              \
10035                 : /* No clobbers */);                                   \
10036        result;                                                          \
10037      })
10038 
10039 #define vsri_n_p8(a, b, c)                                              \
10040   __extension__                                                         \
10041     ({                                                                  \
10042        poly8x8_t b_ = (b);                                              \
10043        poly8x8_t a_ = (a);                                              \
10044        poly8x8_t result;                                                \
10045        __asm__ ("sri %0.8b,%2.8b,%3"                                    \
10046                 : "=w"(result)                                          \
10047                 : "0"(a_), "w"(b_), "i"(c)                              \
10048                 : /* No clobbers */);                                   \
10049        result;                                                          \
10050      })
10051 
10052 #define vsri_n_p16(a, b, c)                                             \
10053   __extension__                                                         \
10054     ({                                                                  \
10055        poly16x4_t b_ = (b);                                             \
10056        poly16x4_t a_ = (a);                                             \
10057        poly16x4_t result;                                               \
10058        __asm__ ("sri %0.4h,%2.4h,%3"                                    \
10059                 : "=w"(result)                                          \
10060                 : "0"(a_), "w"(b_), "i"(c)                              \
10061                 : /* No clobbers */);                                   \
10062        result;                                                          \
10063      })
10064 
10065 #define vsriq_n_p8(a, b, c)                                             \
10066   __extension__                                                         \
10067     ({                                                                  \
10068        poly8x16_t b_ = (b);                                             \
10069        poly8x16_t a_ = (a);                                             \
10070        poly8x16_t result;                                               \
10071        __asm__ ("sri %0.16b,%2.16b,%3"                                  \
10072                 : "=w"(result)                                          \
10073                 : "0"(a_), "w"(b_), "i"(c)                              \
10074                 : /* No clobbers */);                                   \
10075        result;                                                          \
10076      })
10077 
10078 #define vsriq_n_p16(a, b, c)                                            \
10079   __extension__                                                         \
10080     ({                                                                  \
10081        poly16x8_t b_ = (b);                                             \
10082        poly16x8_t a_ = (a);                                             \
10083        poly16x8_t result;                                               \
10084        __asm__ ("sri %0.8h,%2.8h,%3"                                    \
10085                 : "=w"(result)                                          \
10086                 : "0"(a_), "w"(b_), "i"(c)                              \
10087                 : /* No clobbers */);                                   \
10088        result;                                                          \
10089      })
10090 
10091 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_p8(poly8x8_t a,poly8x8_t b)10092 vtst_p8 (poly8x8_t a, poly8x8_t b)
10093 {
10094   uint8x8_t result;
10095   __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
10096            : "=w"(result)
10097            : "w"(a), "w"(b)
10098            : /* No clobbers */);
10099   return result;
10100 }
10101 
10102 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_p16(poly16x4_t a,poly16x4_t b)10103 vtst_p16 (poly16x4_t a, poly16x4_t b)
10104 {
10105   uint16x4_t result;
10106   __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
10107            : "=w"(result)
10108            : "w"(a), "w"(b)
10109            : /* No clobbers */);
10110   return result;
10111 }
10112 
10113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_p8(poly8x16_t a,poly8x16_t b)10114 vtstq_p8 (poly8x16_t a, poly8x16_t b)
10115 {
10116   uint8x16_t result;
10117   __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
10118            : "=w"(result)
10119            : "w"(a), "w"(b)
10120            : /* No clobbers */);
10121   return result;
10122 }
10123 
10124 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_p16(poly16x8_t a,poly16x8_t b)10125 vtstq_p16 (poly16x8_t a, poly16x8_t b)
10126 {
10127   uint16x8_t result;
10128   __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
10129            : "=w"(result)
10130            : "w"(a), "w"(b)
10131            : /* No clobbers */);
10132   return result;
10133 }
10134 
10135 /* End of temporary inline asm implementations.  */
10136 
10137 /* Start of temporary inline asm for vldn, vstn and friends.  */
10138 
10139 /* Create struct element types for duplicating loads.
10140 
10141    Create 2 element structures of:
10142 
10143    +------+----+----+----+----+
10144    |      | 8  | 16 | 32 | 64 |
10145    +------+----+----+----+----+
10146    |int   | Y  | Y  | N  | N  |
10147    +------+----+----+----+----+
10148    |uint  | Y  | Y  | N  | N  |
10149    +------+----+----+----+----+
10150    |float | -  | Y  | N  | N  |
10151    +------+----+----+----+----+
10152    |poly  | Y  | Y  | -  | -  |
10153    +------+----+----+----+----+
10154 
10155    Create 3 element structures of:
10156 
10157    +------+----+----+----+----+
10158    |      | 8  | 16 | 32 | 64 |
10159    +------+----+----+----+----+
10160    |int   | Y  | Y  | Y  | Y  |
10161    +------+----+----+----+----+
10162    |uint  | Y  | Y  | Y  | Y  |
10163    +------+----+----+----+----+
10164    |float | -  | Y  | Y  | Y  |
10165    +------+----+----+----+----+
10166    |poly  | Y  | Y  | -  | -  |
10167    +------+----+----+----+----+
10168 
10169    Create 4 element structures of:
10170 
10171    +------+----+----+----+----+
10172    |      | 8  | 16 | 32 | 64 |
10173    +------+----+----+----+----+
10174    |int   | Y  | N  | N  | Y  |
10175    +------+----+----+----+----+
10176    |uint  | Y  | N  | N  | Y  |
10177    +------+----+----+----+----+
10178    |float | -  | N  | N  | Y  |
10179    +------+----+----+----+----+
10180    |poly  | Y  | N  | -  | -  |
10181    +------+----+----+----+----+
10182 
10183   This is required for casting memory reference.  */
10184 #define __STRUCTN(t, sz, nelem)			\
10185   typedef struct t ## sz ## x ## nelem ## _t {	\
10186     t ## sz ## _t val[nelem];			\
10187   }  t ## sz ## x ## nelem ## _t;
10188 
10189 /* 2-element structs.  */
10190 __STRUCTN (int, 8, 2)
10191 __STRUCTN (int, 16, 2)
10192 __STRUCTN (uint, 8, 2)
10193 __STRUCTN (uint, 16, 2)
10194 __STRUCTN (float, 16, 2)
10195 __STRUCTN (poly, 8, 2)
10196 __STRUCTN (poly, 16, 2)
10197 /* 3-element structs.  */
10198 __STRUCTN (int, 8, 3)
10199 __STRUCTN (int, 16, 3)
10200 __STRUCTN (int, 32, 3)
10201 __STRUCTN (int, 64, 3)
10202 __STRUCTN (uint, 8, 3)
10203 __STRUCTN (uint, 16, 3)
10204 __STRUCTN (uint, 32, 3)
10205 __STRUCTN (uint, 64, 3)
10206 __STRUCTN (float, 16, 3)
10207 __STRUCTN (float, 32, 3)
10208 __STRUCTN (float, 64, 3)
10209 __STRUCTN (poly, 8, 3)
10210 __STRUCTN (poly, 16, 3)
10211 /* 4-element structs.  */
10212 __STRUCTN (int, 8, 4)
10213 __STRUCTN (int, 64, 4)
10214 __STRUCTN (uint, 8, 4)
10215 __STRUCTN (uint, 64, 4)
10216 __STRUCTN (poly, 8, 4)
10217 __STRUCTN (float, 64, 4)
10218 #undef __STRUCTN
10219 
10220 
10221 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
10222 			qmode, ptr_mode, funcsuffix, signedtype)	     \
10223 __extension__ static __inline void					     \
10224 __attribute__ ((__always_inline__))					     \
10225 vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
10226 			  intype __b, const int __c)			     \
10227 {									     \
10228   __builtin_aarch64_simd_oi __o;					     \
10229   largetype __temp;							     \
10230   __temp.val[0]								     \
10231     = vcombine_##funcsuffix (__b.val[0],				     \
10232 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10233   __temp.val[1]								     \
10234     = vcombine_##funcsuffix (__b.val[1],				     \
10235 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10236   __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
10237 					     (signedtype) __temp.val[0], 0); \
10238   __o = __builtin_aarch64_set_qregoi##qmode (__o,			     \
10239 					     (signedtype) __temp.val[1], 1); \
10240   __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
10241 				     __ptr, __o, __c);			     \
10242 }
10243 
__ST2_LANE_FUNC(float16x4x2_t,float16x8x2_t,float16_t,v4hf,v8hf,hf,f16,float16x8_t)10244 __ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
10245 		 float16x8_t)
10246 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
10247 		 float32x4_t)
10248 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
10249 		 float64x2_t)
10250 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
10251 		 int8x16_t)
10252 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
10253 		 int16x8_t)
10254 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
10255 		 int8x16_t)
10256 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
10257 		 int16x8_t)
10258 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
10259 		 int32x4_t)
10260 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
10261 		 int64x2_t)
10262 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
10263 		 int8x16_t)
10264 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
10265 		 int16x8_t)
10266 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
10267 		 int32x4_t)
10268 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
10269 		 int64x2_t)
10270 
10271 #undef __ST2_LANE_FUNC
10272 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
10273 __extension__ static __inline void					    \
10274 __attribute__ ((__always_inline__))					    \
10275 vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
10276 			   intype __b, const int __c)			    \
10277 {									    \
10278   union { intype __i;							    \
10279 	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
10280   __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10281 				    __ptr, __temp.__o, __c);		    \
10282 }
10283 
10284 __ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
10285 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
10286 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
10287 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
10288 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
10289 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
10290 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
10291 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
10292 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
10293 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
10294 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
10295 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
10296 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
10297 
10298 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
10299 			qmode, ptr_mode, funcsuffix, signedtype)	     \
10300 __extension__ static __inline void					     \
10301 __attribute__ ((__always_inline__))					     \
10302 vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
10303 			  intype __b, const int __c)			     \
10304 {									     \
10305   __builtin_aarch64_simd_ci __o;					     \
10306   largetype __temp;							     \
10307   __temp.val[0]								     \
10308     = vcombine_##funcsuffix (__b.val[0],				     \
10309 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10310   __temp.val[1]								     \
10311     = vcombine_##funcsuffix (__b.val[1],				     \
10312 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10313   __temp.val[2]								     \
10314     = vcombine_##funcsuffix (__b.val[2],				     \
10315 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10316   __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
10317 					     (signedtype) __temp.val[0], 0); \
10318   __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
10319 					     (signedtype) __temp.val[1], 1); \
10320   __o = __builtin_aarch64_set_qregci##qmode (__o,			     \
10321 					     (signedtype) __temp.val[2], 2); \
10322   __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
10323 				     __ptr, __o, __c);			     \
10324 }
10325 
10326 __ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
10327 		 float16x8_t)
10328 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
10329 		 float32x4_t)
10330 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
10331 		 float64x2_t)
10332 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
10333 		 int8x16_t)
10334 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
10335 		 int16x8_t)
10336 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
10337 		 int8x16_t)
10338 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
10339 		 int16x8_t)
10340 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
10341 		 int32x4_t)
10342 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
10343 		 int64x2_t)
10344 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
10345 		 int8x16_t)
10346 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
10347 		 int16x8_t)
10348 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
10349 		 int32x4_t)
10350 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
10351 		 int64x2_t)
10352 
10353 #undef __ST3_LANE_FUNC
10354 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
10355 __extension__ static __inline void					    \
10356 __attribute__ ((__always_inline__))					    \
10357 vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
10358 			   intype __b, const int __c)			    \
10359 {									    \
10360   union { intype __i;							    \
10361 	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
10362   __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10363 				    __ptr, __temp.__o, __c);		    \
10364 }
10365 
10366 __ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
10367 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
10368 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
10369 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
10370 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
10371 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
10372 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
10373 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
10374 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
10375 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
10376 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
10377 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
10378 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
10379 
10380 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode,		     \
10381 			qmode, ptr_mode, funcsuffix, signedtype)	     \
10382 __extension__ static __inline void					     \
10383 __attribute__ ((__always_inline__))					     \
10384 vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
10385 			  intype __b, const int __c)			     \
10386 {									     \
10387   __builtin_aarch64_simd_xi __o;					     \
10388   largetype __temp;							     \
10389   __temp.val[0]								     \
10390     = vcombine_##funcsuffix (__b.val[0],				     \
10391 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10392   __temp.val[1]								     \
10393     = vcombine_##funcsuffix (__b.val[1],				     \
10394 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10395   __temp.val[2]								     \
10396     = vcombine_##funcsuffix (__b.val[2],				     \
10397 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10398   __temp.val[3]								     \
10399     = vcombine_##funcsuffix (__b.val[3],				     \
10400 			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10401   __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
10402 					     (signedtype) __temp.val[0], 0); \
10403   __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
10404 					     (signedtype) __temp.val[1], 1); \
10405   __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
10406 					     (signedtype) __temp.val[2], 2); \
10407   __o = __builtin_aarch64_set_qregxi##qmode (__o,			     \
10408 					     (signedtype) __temp.val[3], 3); \
10409   __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
10410 				     __ptr, __o, __c);			     \
10411 }
10412 
10413 __ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
10414 		 float16x8_t)
10415 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
10416 		 float32x4_t)
10417 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
10418 		 float64x2_t)
10419 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
10420 		 int8x16_t)
10421 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
10422 		 int16x8_t)
10423 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
10424 		 int8x16_t)
10425 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
10426 		 int16x8_t)
10427 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
10428 		 int32x4_t)
10429 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
10430 		 int64x2_t)
10431 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
10432 		 int8x16_t)
10433 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
10434 		 int16x8_t)
10435 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
10436 		 int32x4_t)
10437 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
10438 		 int64x2_t)
10439 
10440 #undef __ST4_LANE_FUNC
10441 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
10442 __extension__ static __inline void					    \
10443 __attribute__ ((__always_inline__))					    \
10444 vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
10445 			   intype __b, const int __c)			    \
10446 {									    \
10447   union { intype __i;							    \
10448 	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
10449   __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10450 				    __ptr, __temp.__o, __c);		    \
10451 }
10452 
10453 __ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
10454 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
10455 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
10456 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
10457 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
10458 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
10459 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
10460 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
10461 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
10462 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
10463 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
10464 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
10465 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
10466 
10467 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10468 vaddlv_s32 (int32x2_t a)
10469 {
10470   int64_t result;
10471   __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10472   return result;
10473 }
10474 
10475 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddlv_u32(uint32x2_t a)10476 vaddlv_u32 (uint32x2_t a)
10477 {
10478   uint64_t result;
10479   __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10480   return result;
10481 }
10482 
10483 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_laneq_s16(int16x4_t __a,int16x8_t __b,const int __c)10484 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10485 {
10486   return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
10487 }
10488 
10489 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_laneq_s32(int32x2_t __a,int32x4_t __b,const int __c)10490 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10491 {
10492   return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
10493 }
10494 
10495 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_laneq_s16(int16x8_t __a,int16x8_t __b,const int __c)10496 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10497 {
10498   return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
10499 }
10500 
10501 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_laneq_s32(int32x4_t __a,int32x4_t __b,const int __c)10502 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10503 {
10504   return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
10505 }
10506 
10507 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_laneq_s16(int16x4_t __a,int16x8_t __b,const int __c)10508 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10509 {
10510   return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
10511 }
10512 
10513 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_laneq_s32(int32x2_t __a,int32x4_t __b,const int __c)10514 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10515 {
10516   return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
10517 }
10518 
10519 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_laneq_s16(int16x8_t __a,int16x8_t __b,const int __c)10520 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10521 {
10522   return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
10523 }
10524 
10525 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_laneq_s32(int32x4_t __a,int32x4_t __b,const int __c)10526 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10527 {
10528   return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
10529 }
10530 
10531 /* Table intrinsics.  */
10532 
10533 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl1_p8(poly8x16_t a,uint8x8_t b)10534 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
10535 {
10536   poly8x8_t result;
10537   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10538            : "=w"(result)
10539            : "w"(a), "w"(b)
10540            : /* No clobbers */);
10541   return result;
10542 }
10543 
10544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl1_s8(int8x16_t a,uint8x8_t b)10545 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
10546 {
10547   int8x8_t result;
10548   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10549            : "=w"(result)
10550            : "w"(a), "w"(b)
10551            : /* No clobbers */);
10552   return result;
10553 }
10554 
10555 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl1_u8(uint8x16_t a,uint8x8_t b)10556 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
10557 {
10558   uint8x8_t result;
10559   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10560            : "=w"(result)
10561            : "w"(a), "w"(b)
10562            : /* No clobbers */);
10563   return result;
10564 }
10565 
10566 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl1q_p8(poly8x16_t a,uint8x16_t b)10567 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
10568 {
10569   poly8x16_t result;
10570   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10571            : "=w"(result)
10572            : "w"(a), "w"(b)
10573            : /* No clobbers */);
10574   return result;
10575 }
10576 
10577 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl1q_s8(int8x16_t a,uint8x16_t b)10578 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
10579 {
10580   int8x16_t result;
10581   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10582            : "=w"(result)
10583            : "w"(a), "w"(b)
10584            : /* No clobbers */);
10585   return result;
10586 }
10587 
10588 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl1q_u8(uint8x16_t a,uint8x16_t b)10589 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
10590 {
10591   uint8x16_t result;
10592   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10593            : "=w"(result)
10594            : "w"(a), "w"(b)
10595            : /* No clobbers */);
10596   return result;
10597 }
10598 
10599 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx1_s8(int8x8_t r,int8x16_t tab,uint8x8_t idx)10600 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
10601 {
10602   int8x8_t result = r;
10603   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10604            : "+w"(result)
10605            : "w"(tab), "w"(idx)
10606            : /* No clobbers */);
10607   return result;
10608 }
10609 
10610 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx1_u8(uint8x8_t r,uint8x16_t tab,uint8x8_t idx)10611 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
10612 {
10613   uint8x8_t result = r;
10614   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10615            : "+w"(result)
10616            : "w"(tab), "w"(idx)
10617            : /* No clobbers */);
10618   return result;
10619 }
10620 
10621 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx1_p8(poly8x8_t r,poly8x16_t tab,uint8x8_t idx)10622 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
10623 {
10624   poly8x8_t result = r;
10625   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10626            : "+w"(result)
10627            : "w"(tab), "w"(idx)
10628            : /* No clobbers */);
10629   return result;
10630 }
10631 
10632 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx1q_s8(int8x16_t r,int8x16_t tab,uint8x16_t idx)10633 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
10634 {
10635   int8x16_t result = r;
10636   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10637            : "+w"(result)
10638            : "w"(tab), "w"(idx)
10639            : /* No clobbers */);
10640   return result;
10641 }
10642 
10643 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx1q_u8(uint8x16_t r,uint8x16_t tab,uint8x16_t idx)10644 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
10645 {
10646   uint8x16_t result = r;
10647   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10648            : "+w"(result)
10649            : "w"(tab), "w"(idx)
10650            : /* No clobbers */);
10651   return result;
10652 }
10653 
10654 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx1q_p8(poly8x16_t r,poly8x16_t tab,uint8x16_t idx)10655 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
10656 {
10657   poly8x16_t result = r;
10658   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10659            : "+w"(result)
10660            : "w"(tab), "w"(idx)
10661            : /* No clobbers */);
10662   return result;
10663 }
10664 
10665 /* V7 legacy table intrinsics.  */
10666 
10667 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl1_s8(int8x8_t tab,int8x8_t idx)10668 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
10669 {
10670   int8x8_t result;
10671   int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10672   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10673            : "=w"(result)
10674            : "w"(temp), "w"(idx)
10675            : /* No clobbers */);
10676   return result;
10677 }
10678 
10679 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl1_u8(uint8x8_t tab,uint8x8_t idx)10680 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
10681 {
10682   uint8x8_t result;
10683   uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10684   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10685            : "=w"(result)
10686            : "w"(temp), "w"(idx)
10687            : /* No clobbers */);
10688   return result;
10689 }
10690 
10691 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl1_p8(poly8x8_t tab,uint8x8_t idx)10692 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
10693 {
10694   poly8x8_t result;
10695   poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
10696   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10697            : "=w"(result)
10698            : "w"(temp), "w"(idx)
10699            : /* No clobbers */);
10700   return result;
10701 }
10702 
10703 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl2_s8(int8x8x2_t tab,int8x8_t idx)10704 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
10705 {
10706   int8x8_t result;
10707   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
10708   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10709            : "=w"(result)
10710            : "w"(temp), "w"(idx)
10711            : /* No clobbers */);
10712   return result;
10713 }
10714 
10715 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl2_u8(uint8x8x2_t tab,uint8x8_t idx)10716 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
10717 {
10718   uint8x8_t result;
10719   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
10720   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10721            : "=w"(result)
10722            : "w"(temp), "w"(idx)
10723            : /* No clobbers */);
10724   return result;
10725 }
10726 
10727 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl2_p8(poly8x8x2_t tab,uint8x8_t idx)10728 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
10729 {
10730   poly8x8_t result;
10731   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
10732   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10733            : "=w"(result)
10734            : "w"(temp), "w"(idx)
10735            : /* No clobbers */);
10736   return result;
10737 }
10738 
10739 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl3_s8(int8x8x3_t tab,int8x8_t idx)10740 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
10741 {
10742   int8x8_t result;
10743   int8x16x2_t temp;
10744   __builtin_aarch64_simd_oi __o;
10745   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
10746   temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10747   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10748 					   (int8x16_t) temp.val[0], 0);
10749   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10750 					   (int8x16_t) temp.val[1], 1);
10751   result = __builtin_aarch64_tbl3v8qi (__o, idx);
10752   return result;
10753 }
10754 
10755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl3_u8(uint8x8x3_t tab,uint8x8_t idx)10756 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
10757 {
10758   uint8x8_t result;
10759   uint8x16x2_t temp;
10760   __builtin_aarch64_simd_oi __o;
10761   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
10762   temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10763   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10764 					   (int8x16_t) temp.val[0], 0);
10765   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10766 					   (int8x16_t) temp.val[1], 1);
10767   result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10768   return result;
10769 }
10770 
10771 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl3_p8(poly8x8x3_t tab,uint8x8_t idx)10772 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
10773 {
10774   poly8x8_t result;
10775   poly8x16x2_t temp;
10776   __builtin_aarch64_simd_oi __o;
10777   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
10778   temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
10779   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10780 					   (int8x16_t) temp.val[0], 0);
10781   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10782 					   (int8x16_t) temp.val[1], 1);
10783   result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10784   return result;
10785 }
10786 
10787 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl4_s8(int8x8x4_t tab,int8x8_t idx)10788 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
10789 {
10790   int8x8_t result;
10791   int8x16x2_t temp;
10792   __builtin_aarch64_simd_oi __o;
10793   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
10794   temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
10795   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10796 					   (int8x16_t) temp.val[0], 0);
10797   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10798 					   (int8x16_t) temp.val[1], 1);
10799   result = __builtin_aarch64_tbl3v8qi (__o, idx);
10800   return result;
10801 }
10802 
10803 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl4_u8(uint8x8x4_t tab,uint8x8_t idx)10804 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
10805 {
10806   uint8x8_t result;
10807   uint8x16x2_t temp;
10808   __builtin_aarch64_simd_oi __o;
10809   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
10810   temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
10811   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10812 					   (int8x16_t) temp.val[0], 0);
10813   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10814 					   (int8x16_t) temp.val[1], 1);
10815   result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10816   return result;
10817 }
10818 
10819 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl4_p8(poly8x8x4_t tab,uint8x8_t idx)10820 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
10821 {
10822   poly8x8_t result;
10823   poly8x16x2_t temp;
10824   __builtin_aarch64_simd_oi __o;
10825   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
10826   temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
10827   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10828 					   (int8x16_t) temp.val[0], 0);
10829   __o = __builtin_aarch64_set_qregoiv16qi (__o,
10830 					   (int8x16_t) temp.val[1], 1);
10831   result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10832   return result;
10833 }
10834 
10835 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx2_s8(int8x8_t r,int8x8x2_t tab,int8x8_t idx)10836 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
10837 {
10838   int8x8_t result = r;
10839   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
10840   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10841            : "+w"(result)
10842            : "w"(temp), "w"(idx)
10843            : /* No clobbers */);
10844   return result;
10845 }
10846 
10847 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx2_u8(uint8x8_t r,uint8x8x2_t tab,uint8x8_t idx)10848 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
10849 {
10850   uint8x8_t result = r;
10851   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
10852   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10853            : "+w"(result)
10854            : "w"(temp), "w"(idx)
10855            : /* No clobbers */);
10856   return result;
10857 }
10858 
10859 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx2_p8(poly8x8_t r,poly8x8x2_t tab,uint8x8_t idx)10860 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
10861 {
10862   poly8x8_t result = r;
10863   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
10864   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10865            : "+w"(result)
10866            : "w"(temp), "w"(idx)
10867            : /* No clobbers */);
10868   return result;
10869 }
10870 
10871 /* End of temporary inline asm.  */
10872 
10873 /* Start of optimal implementations in approved order.  */
10874 
10875 /* vabs  */
10876 
10877 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vabs_f32(float32x2_t __a)10878 vabs_f32 (float32x2_t __a)
10879 {
10880   return __builtin_aarch64_absv2sf (__a);
10881 }
10882 
10883 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vabs_f64(float64x1_t __a)10884 vabs_f64 (float64x1_t __a)
10885 {
10886   return (float64x1_t) {__builtin_fabs (__a[0])};
10887 }
10888 
10889 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabs_s8(int8x8_t __a)10890 vabs_s8 (int8x8_t __a)
10891 {
10892   return __builtin_aarch64_absv8qi (__a);
10893 }
10894 
10895 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabs_s16(int16x4_t __a)10896 vabs_s16 (int16x4_t __a)
10897 {
10898   return __builtin_aarch64_absv4hi (__a);
10899 }
10900 
10901 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabs_s32(int32x2_t __a)10902 vabs_s32 (int32x2_t __a)
10903 {
10904   return __builtin_aarch64_absv2si (__a);
10905 }
10906 
10907 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vabs_s64(int64x1_t __a)10908 vabs_s64 (int64x1_t __a)
10909 {
10910   return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
10911 }
10912 
10913 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vabsq_f32(float32x4_t __a)10914 vabsq_f32 (float32x4_t __a)
10915 {
10916   return __builtin_aarch64_absv4sf (__a);
10917 }
10918 
10919 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vabsq_f64(float64x2_t __a)10920 vabsq_f64 (float64x2_t __a)
10921 {
10922   return __builtin_aarch64_absv2df (__a);
10923 }
10924 
10925 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabsq_s8(int8x16_t __a)10926 vabsq_s8 (int8x16_t __a)
10927 {
10928   return __builtin_aarch64_absv16qi (__a);
10929 }
10930 
10931 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabsq_s16(int16x8_t __a)10932 vabsq_s16 (int16x8_t __a)
10933 {
10934   return __builtin_aarch64_absv8hi (__a);
10935 }
10936 
10937 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabsq_s32(int32x4_t __a)10938 vabsq_s32 (int32x4_t __a)
10939 {
10940   return __builtin_aarch64_absv4si (__a);
10941 }
10942 
10943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabsq_s64(int64x2_t __a)10944 vabsq_s64 (int64x2_t __a)
10945 {
10946   return __builtin_aarch64_absv2di (__a);
10947 }
10948 
10949 /* vadd */
10950 
10951 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddd_s64(int64_t __a,int64_t __b)10952 vaddd_s64 (int64_t __a, int64_t __b)
10953 {
10954   return __a + __b;
10955 }
10956 
10957 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddd_u64(uint64_t __a,uint64_t __b)10958 vaddd_u64 (uint64_t __a, uint64_t __b)
10959 {
10960   return __a + __b;
10961 }
10962 
10963 /* vaddv */
10964 
10965 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8(int8x8_t __a)10966 vaddv_s8 (int8x8_t __a)
10967 {
10968   return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
10969 }
10970 
10971 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16(int16x4_t __a)10972 vaddv_s16 (int16x4_t __a)
10973 {
10974   return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
10975 }
10976 
10977 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32(int32x2_t __a)10978 vaddv_s32 (int32x2_t __a)
10979 {
10980   return __builtin_aarch64_reduc_plus_scal_v2si (__a);
10981 }
10982 
10983 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddv_u8(uint8x8_t __a)10984 vaddv_u8 (uint8x8_t __a)
10985 {
10986   return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
10987 }
10988 
10989 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddv_u16(uint16x4_t __a)10990 vaddv_u16 (uint16x4_t __a)
10991 {
10992   return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
10993 }
10994 
10995 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddv_u32(uint32x2_t __a)10996 vaddv_u32 (uint32x2_t __a)
10997 {
10998   return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
10999 }
11000 
11001 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8(int8x16_t __a)11002 vaddvq_s8 (int8x16_t __a)
11003 {
11004   return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
11005 }
11006 
11007 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16(int16x8_t __a)11008 vaddvq_s16 (int16x8_t __a)
11009 {
11010   return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
11011 }
11012 
11013 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32(int32x4_t __a)11014 vaddvq_s32 (int32x4_t __a)
11015 {
11016   return __builtin_aarch64_reduc_plus_scal_v4si (__a);
11017 }
11018 
11019 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddvq_s64(int64x2_t __a)11020 vaddvq_s64 (int64x2_t __a)
11021 {
11022   return __builtin_aarch64_reduc_plus_scal_v2di (__a);
11023 }
11024 
11025 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddvq_u8(uint8x16_t __a)11026 vaddvq_u8 (uint8x16_t __a)
11027 {
11028   return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
11029 }
11030 
11031 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddvq_u16(uint16x8_t __a)11032 vaddvq_u16 (uint16x8_t __a)
11033 {
11034   return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
11035 }
11036 
11037 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddvq_u32(uint32x4_t __a)11038 vaddvq_u32 (uint32x4_t __a)
11039 {
11040   return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
11041 }
11042 
11043 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddvq_u64(uint64x2_t __a)11044 vaddvq_u64 (uint64x2_t __a)
11045 {
11046   return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
11047 }
11048 
11049 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddv_f32(float32x2_t __a)11050 vaddv_f32 (float32x2_t __a)
11051 {
11052   return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
11053 }
11054 
11055 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddvq_f32(float32x4_t __a)11056 vaddvq_f32 (float32x4_t __a)
11057 {
11058   return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
11059 }
11060 
11061 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vaddvq_f64(float64x2_t __a)11062 vaddvq_f64 (float64x2_t __a)
11063 {
11064   return __builtin_aarch64_reduc_plus_scal_v2df (__a);
11065 }
11066 
11067 /* vbsl  */
11068 
11069 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vbsl_f32(uint32x2_t __a,float32x2_t __b,float32x2_t __c)11070 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
11071 {
11072   return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
11073 }
11074 
11075 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vbsl_f64(uint64x1_t __a,float64x1_t __b,float64x1_t __c)11076 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
11077 {
11078   return (float64x1_t)
11079     { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
11080 }
11081 
11082 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vbsl_p8(uint8x8_t __a,poly8x8_t __b,poly8x8_t __c)11083 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
11084 {
11085   return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
11086 }
11087 
11088 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vbsl_p16(uint16x4_t __a,poly16x4_t __b,poly16x4_t __c)11089 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
11090 {
11091   return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
11092 }
11093 
11094 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vbsl_s8(uint8x8_t __a,int8x8_t __b,int8x8_t __c)11095 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
11096 {
11097   return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
11098 }
11099 
11100 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vbsl_s16(uint16x4_t __a,int16x4_t __b,int16x4_t __c)11101 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
11102 {
11103   return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
11104 }
11105 
11106 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vbsl_s32(uint32x2_t __a,int32x2_t __b,int32x2_t __c)11107 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
11108 {
11109   return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
11110 }
11111 
11112 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vbsl_s64(uint64x1_t __a,int64x1_t __b,int64x1_t __c)11113 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
11114 {
11115   return (int64x1_t)
11116       {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
11117 }
11118 
11119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vbsl_u8(uint8x8_t __a,uint8x8_t __b,uint8x8_t __c)11120 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
11121 {
11122   return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
11123 }
11124 
11125 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vbsl_u16(uint16x4_t __a,uint16x4_t __b,uint16x4_t __c)11126 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
11127 {
11128   return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
11129 }
11130 
11131 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vbsl_u32(uint32x2_t __a,uint32x2_t __b,uint32x2_t __c)11132 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
11133 {
11134   return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
11135 }
11136 
11137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vbsl_u64(uint64x1_t __a,uint64x1_t __b,uint64x1_t __c)11138 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
11139 {
11140   return (uint64x1_t)
11141       {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
11142 }
11143 
11144 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vbslq_f32(uint32x4_t __a,float32x4_t __b,float32x4_t __c)11145 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
11146 {
11147   return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
11148 }
11149 
11150 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vbslq_f64(uint64x2_t __a,float64x2_t __b,float64x2_t __c)11151 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
11152 {
11153   return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
11154 }
11155 
11156 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vbslq_p8(uint8x16_t __a,poly8x16_t __b,poly8x16_t __c)11157 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
11158 {
11159   return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
11160 }
11161 
11162 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vbslq_p16(uint16x8_t __a,poly16x8_t __b,poly16x8_t __c)11163 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
11164 {
11165   return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
11166 }
11167 
11168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vbslq_s8(uint8x16_t __a,int8x16_t __b,int8x16_t __c)11169 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
11170 {
11171   return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
11172 }
11173 
11174 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vbslq_s16(uint16x8_t __a,int16x8_t __b,int16x8_t __c)11175 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
11176 {
11177   return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
11178 }
11179 
11180 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vbslq_s32(uint32x4_t __a,int32x4_t __b,int32x4_t __c)11181 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
11182 {
11183   return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
11184 }
11185 
11186 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vbslq_s64(uint64x2_t __a,int64x2_t __b,int64x2_t __c)11187 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
11188 {
11189   return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
11190 }
11191 
11192 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vbslq_u8(uint8x16_t __a,uint8x16_t __b,uint8x16_t __c)11193 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
11194 {
11195   return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
11196 }
11197 
11198 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vbslq_u16(uint16x8_t __a,uint16x8_t __b,uint16x8_t __c)11199 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
11200 {
11201   return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
11202 }
11203 
11204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vbslq_u32(uint32x4_t __a,uint32x4_t __b,uint32x4_t __c)11205 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
11206 {
11207   return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
11208 }
11209 
11210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vbslq_u64(uint64x2_t __a,uint64x2_t __b,uint64x2_t __c)11211 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
11212 {
11213   return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
11214 }
11215 
11216 /* ARMv8.1 instrinsics.  */
11217 #pragma GCC push_options
11218 #pragma GCC target ("arch=armv8.1-a")
11219 
11220 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlah_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c)11221 vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
11222 {
11223   return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
11224 }
11225 
11226 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlah_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c)11227 vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
11228 {
11229   return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
11230 }
11231 
11232 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlahq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c)11233 vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
11234 {
11235   return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
11236 }
11237 
11238 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlahq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c)11239 vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
11240 {
11241   return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
11242 }
11243 
11244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlsh_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c)11245 vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
11246 {
11247   return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
11248 }
11249 
11250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlsh_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c)11251 vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
11252 {
11253   return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
11254 }
11255 
11256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlshq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c)11257 vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
11258 {
11259   return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
11260 }
11261 
11262 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlshq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c)11263 vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
11264 {
11265   return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
11266 }
11267 
11268 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlah_laneq_s16(int16x4_t __a,int16x4_t __b,int16x8_t __c,const int __d)11269 vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
11270 {
11271   return  __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
11272 }
11273 
11274 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlah_laneq_s32(int32x2_t __a,int32x2_t __b,int32x4_t __c,const int __d)11275 vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
11276 {
11277   return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
11278 }
11279 
11280 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlahq_laneq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c,const int __d)11281 vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
11282 {
11283   return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
11284 }
11285 
11286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlahq_laneq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c,const int __d)11287 vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
11288 {
11289   return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
11290 }
11291 
11292 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlsh_laneq_s16(int16x4_t __a,int16x4_t __b,int16x8_t __c,const int __d)11293 vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
11294 {
11295   return  __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
11296 }
11297 
11298 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlsh_laneq_s32(int32x2_t __a,int32x2_t __b,int32x4_t __c,const int __d)11299 vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
11300 {
11301   return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
11302 }
11303 
11304 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlshq_laneq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c,const int __d)11305 vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
11306 {
11307   return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
11308 }
11309 
11310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlshq_laneq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c,const int __d)11311 vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
11312 {
11313   return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
11314 }
11315 
11316 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlah_lane_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c,const int __d)11317 vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
11318 {
11319   return  __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
11320 }
11321 
11322 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlah_lane_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c,const int __d)11323 vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
11324 {
11325   return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
11326 }
11327 
11328 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlahq_lane_s16(int16x8_t __a,int16x8_t __b,int16x4_t __c,const int __d)11329 vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
11330 {
11331   return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
11332 }
11333 
11334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlahq_lane_s32(int32x4_t __a,int32x4_t __b,int32x2_t __c,const int __d)11335 vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
11336 {
11337   return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
11338 }
11339 
11340 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlahh_s16(int16_t __a,int16_t __b,int16_t __c)11341 vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
11342 {
11343   return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
11344 }
11345 
11346 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlahh_lane_s16(int16_t __a,int16_t __b,int16x4_t __c,const int __d)11347 vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
11348 {
11349   return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
11350 }
11351 
11352 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlahh_laneq_s16(int16_t __a,int16_t __b,int16x8_t __c,const int __d)11353 vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
11354 {
11355   return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
11356 }
11357 
11358 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlahs_s32(int32_t __a,int32_t __b,int32_t __c)11359 vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
11360 {
11361   return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
11362 }
11363 
11364 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlahs_lane_s32(int32_t __a,int32_t __b,int32x2_t __c,const int __d)11365 vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
11366 {
11367   return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
11368 }
11369 
11370 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlahs_laneq_s32(int32_t __a,int32_t __b,int32x4_t __c,const int __d)11371 vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
11372 {
11373   return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
11374 }
11375 
11376 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlsh_lane_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c,const int __d)11377 vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
11378 {
11379   return  __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
11380 }
11381 
11382 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlsh_lane_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c,const int __d)11383 vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
11384 {
11385   return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
11386 }
11387 
11388 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlshq_lane_s16(int16x8_t __a,int16x8_t __b,int16x4_t __c,const int __d)11389 vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
11390 {
11391   return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
11392 }
11393 
11394 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlshq_lane_s32(int32x4_t __a,int32x4_t __b,int32x2_t __c,const int __d)11395 vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
11396 {
11397   return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
11398 }
11399 
11400 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlshh_s16(int16_t __a,int16_t __b,int16_t __c)11401 vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
11402 {
11403   return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
11404 }
11405 
11406 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlshh_lane_s16(int16_t __a,int16_t __b,int16x4_t __c,const int __d)11407 vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
11408 {
11409   return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
11410 }
11411 
11412 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmlshh_laneq_s16(int16_t __a,int16_t __b,int16x8_t __c,const int __d)11413 vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
11414 {
11415   return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
11416 }
11417 
11418 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlshs_s32(int32_t __a,int32_t __b,int32_t __c)11419 vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
11420 {
11421   return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
11422 }
11423 
11424 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlshs_lane_s32(int32_t __a,int32_t __b,int32x2_t __c,const int __d)11425 vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
11426 {
11427   return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
11428 }
11429 
11430 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmlshs_laneq_s32(int32_t __a,int32_t __b,int32x4_t __c,const int __d)11431 vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
11432 {
11433   return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
11434 }
11435 #pragma GCC pop_options
11436 
11437 #pragma GCC push_options
11438 #pragma GCC target ("+nothing+crypto")
11439 /* vaes  */
11440 
11441 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaeseq_u8(uint8x16_t data,uint8x16_t key)11442 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
11443 {
11444   return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
11445 }
11446 
11447 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaesdq_u8(uint8x16_t data,uint8x16_t key)11448 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
11449 {
11450   return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
11451 }
11452 
11453 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaesmcq_u8(uint8x16_t data)11454 vaesmcq_u8 (uint8x16_t data)
11455 {
11456   return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
11457 }
11458 
11459 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaesimcq_u8(uint8x16_t data)11460 vaesimcq_u8 (uint8x16_t data)
11461 {
11462   return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
11463 }
11464 #pragma GCC pop_options
11465 
11466 /* vcage  */
11467 
11468 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcage_f64(float64x1_t __a,float64x1_t __b)11469 vcage_f64 (float64x1_t __a, float64x1_t __b)
11470 {
11471   return vabs_f64 (__a) >= vabs_f64 (__b);
11472 }
11473 
11474 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcages_f32(float32_t __a,float32_t __b)11475 vcages_f32 (float32_t __a, float32_t __b)
11476 {
11477   return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
11478 }
11479 
11480 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcage_f32(float32x2_t __a,float32x2_t __b)11481 vcage_f32 (float32x2_t __a, float32x2_t __b)
11482 {
11483   return vabs_f32 (__a) >= vabs_f32 (__b);
11484 }
11485 
11486 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcageq_f32(float32x4_t __a,float32x4_t __b)11487 vcageq_f32 (float32x4_t __a, float32x4_t __b)
11488 {
11489   return vabsq_f32 (__a) >= vabsq_f32 (__b);
11490 }
11491 
11492 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcaged_f64(float64_t __a,float64_t __b)11493 vcaged_f64 (float64_t __a, float64_t __b)
11494 {
11495   return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
11496 }
11497 
11498 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcageq_f64(float64x2_t __a,float64x2_t __b)11499 vcageq_f64 (float64x2_t __a, float64x2_t __b)
11500 {
11501   return vabsq_f64 (__a) >= vabsq_f64 (__b);
11502 }
11503 
11504 /* vcagt  */
11505 
11506 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcagts_f32(float32_t __a,float32_t __b)11507 vcagts_f32 (float32_t __a, float32_t __b)
11508 {
11509   return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
11510 }
11511 
11512 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcagt_f32(float32x2_t __a,float32x2_t __b)11513 vcagt_f32 (float32x2_t __a, float32x2_t __b)
11514 {
11515   return vabs_f32 (__a) > vabs_f32 (__b);
11516 }
11517 
11518 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcagt_f64(float64x1_t __a,float64x1_t __b)11519 vcagt_f64 (float64x1_t __a, float64x1_t __b)
11520 {
11521   return vabs_f64 (__a) > vabs_f64 (__b);
11522 }
11523 
11524 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcagtq_f32(float32x4_t __a,float32x4_t __b)11525 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
11526 {
11527   return vabsq_f32 (__a) > vabsq_f32 (__b);
11528 }
11529 
11530 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcagtd_f64(float64_t __a,float64_t __b)11531 vcagtd_f64 (float64_t __a, float64_t __b)
11532 {
11533   return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
11534 }
11535 
11536 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcagtq_f64(float64x2_t __a,float64x2_t __b)11537 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
11538 {
11539   return vabsq_f64 (__a) > vabsq_f64 (__b);
11540 }
11541 
11542 /* vcale  */
11543 
11544 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcale_f32(float32x2_t __a,float32x2_t __b)11545 vcale_f32 (float32x2_t __a, float32x2_t __b)
11546 {
11547   return vabs_f32 (__a) <= vabs_f32 (__b);
11548 }
11549 
11550 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcale_f64(float64x1_t __a,float64x1_t __b)11551 vcale_f64 (float64x1_t __a, float64x1_t __b)
11552 {
11553   return vabs_f64 (__a) <= vabs_f64 (__b);
11554 }
11555 
11556 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcaled_f64(float64_t __a,float64_t __b)11557 vcaled_f64 (float64_t __a, float64_t __b)
11558 {
11559   return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
11560 }
11561 
11562 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcales_f32(float32_t __a,float32_t __b)11563 vcales_f32 (float32_t __a, float32_t __b)
11564 {
11565   return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
11566 }
11567 
11568 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcaleq_f32(float32x4_t __a,float32x4_t __b)11569 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
11570 {
11571   return vabsq_f32 (__a) <= vabsq_f32 (__b);
11572 }
11573 
11574 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcaleq_f64(float64x2_t __a,float64x2_t __b)11575 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
11576 {
11577   return vabsq_f64 (__a) <= vabsq_f64 (__b);
11578 }
11579 
11580 /* vcalt  */
11581 
11582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcalt_f32(float32x2_t __a,float32x2_t __b)11583 vcalt_f32 (float32x2_t __a, float32x2_t __b)
11584 {
11585   return vabs_f32 (__a) < vabs_f32 (__b);
11586 }
11587 
11588 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcalt_f64(float64x1_t __a,float64x1_t __b)11589 vcalt_f64 (float64x1_t __a, float64x1_t __b)
11590 {
11591   return vabs_f64 (__a) < vabs_f64 (__b);
11592 }
11593 
11594 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcaltd_f64(float64_t __a,float64_t __b)11595 vcaltd_f64 (float64_t __a, float64_t __b)
11596 {
11597   return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
11598 }
11599 
11600 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcaltq_f32(float32x4_t __a,float32x4_t __b)11601 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
11602 {
11603   return vabsq_f32 (__a) < vabsq_f32 (__b);
11604 }
11605 
11606 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcaltq_f64(float64x2_t __a,float64x2_t __b)11607 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
11608 {
11609   return vabsq_f64 (__a) < vabsq_f64 (__b);
11610 }
11611 
11612 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcalts_f32(float32_t __a,float32_t __b)11613 vcalts_f32 (float32_t __a, float32_t __b)
11614 {
11615   return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
11616 }
11617 
11618 /* vceq - vector.  */
11619 
11620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_f32(float32x2_t __a,float32x2_t __b)11621 vceq_f32 (float32x2_t __a, float32x2_t __b)
11622 {
11623   return (uint32x2_t) (__a == __b);
11624 }
11625 
11626 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_f64(float64x1_t __a,float64x1_t __b)11627 vceq_f64 (float64x1_t __a, float64x1_t __b)
11628 {
11629   return (uint64x1_t) (__a == __b);
11630 }
11631 
11632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_p8(poly8x8_t __a,poly8x8_t __b)11633 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
11634 {
11635   return (uint8x8_t) (__a == __b);
11636 }
11637 
11638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_s8(int8x8_t __a,int8x8_t __b)11639 vceq_s8 (int8x8_t __a, int8x8_t __b)
11640 {
11641   return (uint8x8_t) (__a == __b);
11642 }
11643 
11644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceq_s16(int16x4_t __a,int16x4_t __b)11645 vceq_s16 (int16x4_t __a, int16x4_t __b)
11646 {
11647   return (uint16x4_t) (__a == __b);
11648 }
11649 
11650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_s32(int32x2_t __a,int32x2_t __b)11651 vceq_s32 (int32x2_t __a, int32x2_t __b)
11652 {
11653   return (uint32x2_t) (__a == __b);
11654 }
11655 
11656 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_s64(int64x1_t __a,int64x1_t __b)11657 vceq_s64 (int64x1_t __a, int64x1_t __b)
11658 {
11659   return (uint64x1_t) (__a == __b);
11660 }
11661 
11662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_u8(uint8x8_t __a,uint8x8_t __b)11663 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
11664 {
11665   return (__a == __b);
11666 }
11667 
11668 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceq_u16(uint16x4_t __a,uint16x4_t __b)11669 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
11670 {
11671   return (__a == __b);
11672 }
11673 
11674 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_u32(uint32x2_t __a,uint32x2_t __b)11675 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
11676 {
11677   return (__a == __b);
11678 }
11679 
11680 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_u64(uint64x1_t __a,uint64x1_t __b)11681 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
11682 {
11683   return (__a == __b);
11684 }
11685 
11686 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_f32(float32x4_t __a,float32x4_t __b)11687 vceqq_f32 (float32x4_t __a, float32x4_t __b)
11688 {
11689   return (uint32x4_t) (__a == __b);
11690 }
11691 
11692 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_f64(float64x2_t __a,float64x2_t __b)11693 vceqq_f64 (float64x2_t __a, float64x2_t __b)
11694 {
11695   return (uint64x2_t) (__a == __b);
11696 }
11697 
11698 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_p8(poly8x16_t __a,poly8x16_t __b)11699 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
11700 {
11701   return (uint8x16_t) (__a == __b);
11702 }
11703 
11704 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_s8(int8x16_t __a,int8x16_t __b)11705 vceqq_s8 (int8x16_t __a, int8x16_t __b)
11706 {
11707   return (uint8x16_t) (__a == __b);
11708 }
11709 
11710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqq_s16(int16x8_t __a,int16x8_t __b)11711 vceqq_s16 (int16x8_t __a, int16x8_t __b)
11712 {
11713   return (uint16x8_t) (__a == __b);
11714 }
11715 
11716 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_s32(int32x4_t __a,int32x4_t __b)11717 vceqq_s32 (int32x4_t __a, int32x4_t __b)
11718 {
11719   return (uint32x4_t) (__a == __b);
11720 }
11721 
11722 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_s64(int64x2_t __a,int64x2_t __b)11723 vceqq_s64 (int64x2_t __a, int64x2_t __b)
11724 {
11725   return (uint64x2_t) (__a == __b);
11726 }
11727 
11728 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_u8(uint8x16_t __a,uint8x16_t __b)11729 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
11730 {
11731   return (__a == __b);
11732 }
11733 
11734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqq_u16(uint16x8_t __a,uint16x8_t __b)11735 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
11736 {
11737   return (__a == __b);
11738 }
11739 
11740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_u32(uint32x4_t __a,uint32x4_t __b)11741 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
11742 {
11743   return (__a == __b);
11744 }
11745 
11746 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_u64(uint64x2_t __a,uint64x2_t __b)11747 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
11748 {
11749   return (__a == __b);
11750 }
11751 
11752 /* vceq - scalar.  */
11753 
11754 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vceqs_f32(float32_t __a,float32_t __b)11755 vceqs_f32 (float32_t __a, float32_t __b)
11756 {
11757   return __a == __b ? -1 : 0;
11758 }
11759 
11760 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqd_s64(int64_t __a,int64_t __b)11761 vceqd_s64 (int64_t __a, int64_t __b)
11762 {
11763   return __a == __b ? -1ll : 0ll;
11764 }
11765 
11766 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqd_u64(uint64_t __a,uint64_t __b)11767 vceqd_u64 (uint64_t __a, uint64_t __b)
11768 {
11769   return __a == __b ? -1ll : 0ll;
11770 }
11771 
11772 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqd_f64(float64_t __a,float64_t __b)11773 vceqd_f64 (float64_t __a, float64_t __b)
11774 {
11775   return __a == __b ? -1ll : 0ll;
11776 }
11777 
11778 /* vceqz - vector.  */
11779 
11780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceqz_f32(float32x2_t __a)11781 vceqz_f32 (float32x2_t __a)
11782 {
11783   return (uint32x2_t) (__a == 0.0f);
11784 }
11785 
11786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqz_f64(float64x1_t __a)11787 vceqz_f64 (float64x1_t __a)
11788 {
11789   return (uint64x1_t) (__a == (float64x1_t) {0.0});
11790 }
11791 
11792 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceqz_p8(poly8x8_t __a)11793 vceqz_p8 (poly8x8_t __a)
11794 {
11795   return (uint8x8_t) (__a == 0);
11796 }
11797 
11798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceqz_s8(int8x8_t __a)11799 vceqz_s8 (int8x8_t __a)
11800 {
11801   return (uint8x8_t) (__a == 0);
11802 }
11803 
11804 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceqz_s16(int16x4_t __a)11805 vceqz_s16 (int16x4_t __a)
11806 {
11807   return (uint16x4_t) (__a == 0);
11808 }
11809 
11810 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceqz_s32(int32x2_t __a)11811 vceqz_s32 (int32x2_t __a)
11812 {
11813   return (uint32x2_t) (__a == 0);
11814 }
11815 
11816 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqz_s64(int64x1_t __a)11817 vceqz_s64 (int64x1_t __a)
11818 {
11819   return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
11820 }
11821 
11822 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceqz_u8(uint8x8_t __a)11823 vceqz_u8 (uint8x8_t __a)
11824 {
11825   return (__a == 0);
11826 }
11827 
11828 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceqz_u16(uint16x4_t __a)11829 vceqz_u16 (uint16x4_t __a)
11830 {
11831   return (__a == 0);
11832 }
11833 
11834 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceqz_u32(uint32x2_t __a)11835 vceqz_u32 (uint32x2_t __a)
11836 {
11837   return (__a == 0);
11838 }
11839 
11840 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqz_u64(uint64x1_t __a)11841 vceqz_u64 (uint64x1_t __a)
11842 {
11843   return (__a == __AARCH64_UINT64_C (0));
11844 }
11845 
11846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqzq_f32(float32x4_t __a)11847 vceqzq_f32 (float32x4_t __a)
11848 {
11849   return (uint32x4_t) (__a == 0.0f);
11850 }
11851 
11852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqzq_f64(float64x2_t __a)11853 vceqzq_f64 (float64x2_t __a)
11854 {
11855   return (uint64x2_t) (__a == 0.0f);
11856 }
11857 
11858 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqzq_p8(poly8x16_t __a)11859 vceqzq_p8 (poly8x16_t __a)
11860 {
11861   return (uint8x16_t) (__a == 0);
11862 }
11863 
11864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqzq_s8(int8x16_t __a)11865 vceqzq_s8 (int8x16_t __a)
11866 {
11867   return (uint8x16_t) (__a == 0);
11868 }
11869 
11870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqzq_s16(int16x8_t __a)11871 vceqzq_s16 (int16x8_t __a)
11872 {
11873   return (uint16x8_t) (__a == 0);
11874 }
11875 
11876 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqzq_s32(int32x4_t __a)11877 vceqzq_s32 (int32x4_t __a)
11878 {
11879   return (uint32x4_t) (__a == 0);
11880 }
11881 
11882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqzq_s64(int64x2_t __a)11883 vceqzq_s64 (int64x2_t __a)
11884 {
11885   return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
11886 }
11887 
11888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqzq_u8(uint8x16_t __a)11889 vceqzq_u8 (uint8x16_t __a)
11890 {
11891   return (__a == 0);
11892 }
11893 
11894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqzq_u16(uint16x8_t __a)11895 vceqzq_u16 (uint16x8_t __a)
11896 {
11897   return (__a == 0);
11898 }
11899 
11900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqzq_u32(uint32x4_t __a)11901 vceqzq_u32 (uint32x4_t __a)
11902 {
11903   return (__a == 0);
11904 }
11905 
11906 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqzq_u64(uint64x2_t __a)11907 vceqzq_u64 (uint64x2_t __a)
11908 {
11909   return (__a == __AARCH64_UINT64_C (0));
11910 }
11911 
11912 /* vceqz - scalar.  */
11913 
11914 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vceqzs_f32(float32_t __a)11915 vceqzs_f32 (float32_t __a)
11916 {
11917   return __a == 0.0f ? -1 : 0;
11918 }
11919 
11920 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqzd_s64(int64_t __a)11921 vceqzd_s64 (int64_t __a)
11922 {
11923   return __a == 0 ? -1ll : 0ll;
11924 }
11925 
11926 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqzd_u64(uint64_t __a)11927 vceqzd_u64 (uint64_t __a)
11928 {
11929   return __a == 0 ? -1ll : 0ll;
11930 }
11931 
11932 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vceqzd_f64(float64_t __a)11933 vceqzd_f64 (float64_t __a)
11934 {
11935   return __a == 0.0 ? -1ll : 0ll;
11936 }
11937 
11938 /* vcge - vector.  */
11939 
11940 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_f32(float32x2_t __a,float32x2_t __b)11941 vcge_f32 (float32x2_t __a, float32x2_t __b)
11942 {
11943   return (uint32x2_t) (__a >= __b);
11944 }
11945 
11946 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_f64(float64x1_t __a,float64x1_t __b)11947 vcge_f64 (float64x1_t __a, float64x1_t __b)
11948 {
11949   return (uint64x1_t) (__a >= __b);
11950 }
11951 
11952 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_s8(int8x8_t __a,int8x8_t __b)11953 vcge_s8 (int8x8_t __a, int8x8_t __b)
11954 {
11955   return (uint8x8_t) (__a >= __b);
11956 }
11957 
11958 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_s16(int16x4_t __a,int16x4_t __b)11959 vcge_s16 (int16x4_t __a, int16x4_t __b)
11960 {
11961   return (uint16x4_t) (__a >= __b);
11962 }
11963 
11964 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_s32(int32x2_t __a,int32x2_t __b)11965 vcge_s32 (int32x2_t __a, int32x2_t __b)
11966 {
11967   return (uint32x2_t) (__a >= __b);
11968 }
11969 
11970 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_s64(int64x1_t __a,int64x1_t __b)11971 vcge_s64 (int64x1_t __a, int64x1_t __b)
11972 {
11973   return (uint64x1_t) (__a >= __b);
11974 }
11975 
11976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_u8(uint8x8_t __a,uint8x8_t __b)11977 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
11978 {
11979   return (__a >= __b);
11980 }
11981 
11982 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_u16(uint16x4_t __a,uint16x4_t __b)11983 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
11984 {
11985   return (__a >= __b);
11986 }
11987 
11988 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_u32(uint32x2_t __a,uint32x2_t __b)11989 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
11990 {
11991   return (__a >= __b);
11992 }
11993 
11994 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_u64(uint64x1_t __a,uint64x1_t __b)11995 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
11996 {
11997   return (__a >= __b);
11998 }
11999 
12000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_f32(float32x4_t __a,float32x4_t __b)12001 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
12002 {
12003   return (uint32x4_t) (__a >= __b);
12004 }
12005 
12006 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_f64(float64x2_t __a,float64x2_t __b)12007 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
12008 {
12009   return (uint64x2_t) (__a >= __b);
12010 }
12011 
12012 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_s8(int8x16_t __a,int8x16_t __b)12013 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
12014 {
12015   return (uint8x16_t) (__a >= __b);
12016 }
12017 
12018 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_s16(int16x8_t __a,int16x8_t __b)12019 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
12020 {
12021   return (uint16x8_t) (__a >= __b);
12022 }
12023 
12024 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_s32(int32x4_t __a,int32x4_t __b)12025 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
12026 {
12027   return (uint32x4_t) (__a >= __b);
12028 }
12029 
12030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_s64(int64x2_t __a,int64x2_t __b)12031 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
12032 {
12033   return (uint64x2_t) (__a >= __b);
12034 }
12035 
12036 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_u8(uint8x16_t __a,uint8x16_t __b)12037 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
12038 {
12039   return (__a >= __b);
12040 }
12041 
12042 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_u16(uint16x8_t __a,uint16x8_t __b)12043 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
12044 {
12045   return (__a >= __b);
12046 }
12047 
12048 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_u32(uint32x4_t __a,uint32x4_t __b)12049 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
12050 {
12051   return (__a >= __b);
12052 }
12053 
12054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_u64(uint64x2_t __a,uint64x2_t __b)12055 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
12056 {
12057   return (__a >= __b);
12058 }
12059 
12060 /* vcge - scalar.  */
12061 
12062 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcges_f32(float32_t __a,float32_t __b)12063 vcges_f32 (float32_t __a, float32_t __b)
12064 {
12065   return __a >= __b ? -1 : 0;
12066 }
12067 
12068 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcged_s64(int64_t __a,int64_t __b)12069 vcged_s64 (int64_t __a, int64_t __b)
12070 {
12071   return __a >= __b ? -1ll : 0ll;
12072 }
12073 
12074 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcged_u64(uint64_t __a,uint64_t __b)12075 vcged_u64 (uint64_t __a, uint64_t __b)
12076 {
12077   return __a >= __b ? -1ll : 0ll;
12078 }
12079 
12080 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcged_f64(float64_t __a,float64_t __b)12081 vcged_f64 (float64_t __a, float64_t __b)
12082 {
12083   return __a >= __b ? -1ll : 0ll;
12084 }
12085 
12086 /* vcgez - vector.  */
12087 
12088 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgez_f32(float32x2_t __a)12089 vcgez_f32 (float32x2_t __a)
12090 {
12091   return (uint32x2_t) (__a >= 0.0f);
12092 }
12093 
12094 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgez_f64(float64x1_t __a)12095 vcgez_f64 (float64x1_t __a)
12096 {
12097   return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
12098 }
12099 
12100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgez_s8(int8x8_t __a)12101 vcgez_s8 (int8x8_t __a)
12102 {
12103   return (uint8x8_t) (__a >= 0);
12104 }
12105 
12106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgez_s16(int16x4_t __a)12107 vcgez_s16 (int16x4_t __a)
12108 {
12109   return (uint16x4_t) (__a >= 0);
12110 }
12111 
12112 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgez_s32(int32x2_t __a)12113 vcgez_s32 (int32x2_t __a)
12114 {
12115   return (uint32x2_t) (__a >= 0);
12116 }
12117 
12118 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgez_s64(int64x1_t __a)12119 vcgez_s64 (int64x1_t __a)
12120 {
12121   return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
12122 }
12123 
12124 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgezq_f32(float32x4_t __a)12125 vcgezq_f32 (float32x4_t __a)
12126 {
12127   return (uint32x4_t) (__a >= 0.0f);
12128 }
12129 
12130 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgezq_f64(float64x2_t __a)12131 vcgezq_f64 (float64x2_t __a)
12132 {
12133   return (uint64x2_t) (__a >= 0.0);
12134 }
12135 
12136 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgezq_s8(int8x16_t __a)12137 vcgezq_s8 (int8x16_t __a)
12138 {
12139   return (uint8x16_t) (__a >= 0);
12140 }
12141 
12142 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgezq_s16(int16x8_t __a)12143 vcgezq_s16 (int16x8_t __a)
12144 {
12145   return (uint16x8_t) (__a >= 0);
12146 }
12147 
12148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgezq_s32(int32x4_t __a)12149 vcgezq_s32 (int32x4_t __a)
12150 {
12151   return (uint32x4_t) (__a >= 0);
12152 }
12153 
12154 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgezq_s64(int64x2_t __a)12155 vcgezq_s64 (int64x2_t __a)
12156 {
12157   return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
12158 }
12159 
12160 /* vcgez - scalar.  */
12161 
12162 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcgezs_f32(float32_t __a)12163 vcgezs_f32 (float32_t __a)
12164 {
12165   return __a >= 0.0f ? -1 : 0;
12166 }
12167 
12168 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgezd_s64(int64_t __a)12169 vcgezd_s64 (int64_t __a)
12170 {
12171   return __a >= 0 ? -1ll : 0ll;
12172 }
12173 
12174 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgezd_f64(float64_t __a)12175 vcgezd_f64 (float64_t __a)
12176 {
12177   return __a >= 0.0 ? -1ll : 0ll;
12178 }
12179 
12180 /* vcgt - vector.  */
12181 
12182 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_f32(float32x2_t __a,float32x2_t __b)12183 vcgt_f32 (float32x2_t __a, float32x2_t __b)
12184 {
12185   return (uint32x2_t) (__a > __b);
12186 }
12187 
12188 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_f64(float64x1_t __a,float64x1_t __b)12189 vcgt_f64 (float64x1_t __a, float64x1_t __b)
12190 {
12191   return (uint64x1_t) (__a > __b);
12192 }
12193 
12194 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_s8(int8x8_t __a,int8x8_t __b)12195 vcgt_s8 (int8x8_t __a, int8x8_t __b)
12196 {
12197   return (uint8x8_t) (__a > __b);
12198 }
12199 
12200 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_s16(int16x4_t __a,int16x4_t __b)12201 vcgt_s16 (int16x4_t __a, int16x4_t __b)
12202 {
12203   return (uint16x4_t) (__a > __b);
12204 }
12205 
12206 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_s32(int32x2_t __a,int32x2_t __b)12207 vcgt_s32 (int32x2_t __a, int32x2_t __b)
12208 {
12209   return (uint32x2_t) (__a > __b);
12210 }
12211 
12212 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_s64(int64x1_t __a,int64x1_t __b)12213 vcgt_s64 (int64x1_t __a, int64x1_t __b)
12214 {
12215   return (uint64x1_t) (__a > __b);
12216 }
12217 
12218 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_u8(uint8x8_t __a,uint8x8_t __b)12219 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
12220 {
12221   return (__a > __b);
12222 }
12223 
12224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_u16(uint16x4_t __a,uint16x4_t __b)12225 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
12226 {
12227   return (__a > __b);
12228 }
12229 
12230 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_u32(uint32x2_t __a,uint32x2_t __b)12231 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
12232 {
12233   return (__a > __b);
12234 }
12235 
12236 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_u64(uint64x1_t __a,uint64x1_t __b)12237 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
12238 {
12239   return (__a > __b);
12240 }
12241 
12242 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_f32(float32x4_t __a,float32x4_t __b)12243 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
12244 {
12245   return (uint32x4_t) (__a > __b);
12246 }
12247 
12248 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_f64(float64x2_t __a,float64x2_t __b)12249 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
12250 {
12251   return (uint64x2_t) (__a > __b);
12252 }
12253 
12254 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_s8(int8x16_t __a,int8x16_t __b)12255 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
12256 {
12257   return (uint8x16_t) (__a > __b);
12258 }
12259 
12260 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_s16(int16x8_t __a,int16x8_t __b)12261 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
12262 {
12263   return (uint16x8_t) (__a > __b);
12264 }
12265 
12266 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_s32(int32x4_t __a,int32x4_t __b)12267 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
12268 {
12269   return (uint32x4_t) (__a > __b);
12270 }
12271 
12272 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_s64(int64x2_t __a,int64x2_t __b)12273 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
12274 {
12275   return (uint64x2_t) (__a > __b);
12276 }
12277 
12278 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_u8(uint8x16_t __a,uint8x16_t __b)12279 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
12280 {
12281   return (__a > __b);
12282 }
12283 
12284 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_u16(uint16x8_t __a,uint16x8_t __b)12285 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
12286 {
12287   return (__a > __b);
12288 }
12289 
12290 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_u32(uint32x4_t __a,uint32x4_t __b)12291 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
12292 {
12293   return (__a > __b);
12294 }
12295 
12296 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_u64(uint64x2_t __a,uint64x2_t __b)12297 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
12298 {
12299   return (__a > __b);
12300 }
12301 
12302 /* vcgt - scalar.  */
12303 
12304 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcgts_f32(float32_t __a,float32_t __b)12305 vcgts_f32 (float32_t __a, float32_t __b)
12306 {
12307   return __a > __b ? -1 : 0;
12308 }
12309 
12310 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgtd_s64(int64_t __a,int64_t __b)12311 vcgtd_s64 (int64_t __a, int64_t __b)
12312 {
12313   return __a > __b ? -1ll : 0ll;
12314 }
12315 
12316 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgtd_u64(uint64_t __a,uint64_t __b)12317 vcgtd_u64 (uint64_t __a, uint64_t __b)
12318 {
12319   return __a > __b ? -1ll : 0ll;
12320 }
12321 
12322 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgtd_f64(float64_t __a,float64_t __b)12323 vcgtd_f64 (float64_t __a, float64_t __b)
12324 {
12325   return __a > __b ? -1ll : 0ll;
12326 }
12327 
12328 /* vcgtz - vector.  */
12329 
12330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgtz_f32(float32x2_t __a)12331 vcgtz_f32 (float32x2_t __a)
12332 {
12333   return (uint32x2_t) (__a > 0.0f);
12334 }
12335 
12336 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtz_f64(float64x1_t __a)12337 vcgtz_f64 (float64x1_t __a)
12338 {
12339   return (uint64x1_t) (__a > (float64x1_t) {0.0});
12340 }
12341 
12342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgtz_s8(int8x8_t __a)12343 vcgtz_s8 (int8x8_t __a)
12344 {
12345   return (uint8x8_t) (__a > 0);
12346 }
12347 
12348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgtz_s16(int16x4_t __a)12349 vcgtz_s16 (int16x4_t __a)
12350 {
12351   return (uint16x4_t) (__a > 0);
12352 }
12353 
12354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgtz_s32(int32x2_t __a)12355 vcgtz_s32 (int32x2_t __a)
12356 {
12357   return (uint32x2_t) (__a > 0);
12358 }
12359 
12360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtz_s64(int64x1_t __a)12361 vcgtz_s64 (int64x1_t __a)
12362 {
12363   return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
12364 }
12365 
12366 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtzq_f32(float32x4_t __a)12367 vcgtzq_f32 (float32x4_t __a)
12368 {
12369   return (uint32x4_t) (__a > 0.0f);
12370 }
12371 
12372 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtzq_f64(float64x2_t __a)12373 vcgtzq_f64 (float64x2_t __a)
12374 {
12375     return (uint64x2_t) (__a > 0.0);
12376 }
12377 
12378 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtzq_s8(int8x16_t __a)12379 vcgtzq_s8 (int8x16_t __a)
12380 {
12381   return (uint8x16_t) (__a > 0);
12382 }
12383 
12384 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtzq_s16(int16x8_t __a)12385 vcgtzq_s16 (int16x8_t __a)
12386 {
12387   return (uint16x8_t) (__a > 0);
12388 }
12389 
12390 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtzq_s32(int32x4_t __a)12391 vcgtzq_s32 (int32x4_t __a)
12392 {
12393   return (uint32x4_t) (__a > 0);
12394 }
12395 
12396 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtzq_s64(int64x2_t __a)12397 vcgtzq_s64 (int64x2_t __a)
12398 {
12399   return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
12400 }
12401 
12402 /* vcgtz - scalar.  */
12403 
12404 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcgtzs_f32(float32_t __a)12405 vcgtzs_f32 (float32_t __a)
12406 {
12407   return __a > 0.0f ? -1 : 0;
12408 }
12409 
12410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgtzd_s64(int64_t __a)12411 vcgtzd_s64 (int64_t __a)
12412 {
12413   return __a > 0 ? -1ll : 0ll;
12414 }
12415 
12416 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcgtzd_f64(float64_t __a)12417 vcgtzd_f64 (float64_t __a)
12418 {
12419   return __a > 0.0 ? -1ll : 0ll;
12420 }
12421 
12422 /* vcle - vector.  */
12423 
12424 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_f32(float32x2_t __a,float32x2_t __b)12425 vcle_f32 (float32x2_t __a, float32x2_t __b)
12426 {
12427   return (uint32x2_t) (__a <= __b);
12428 }
12429 
12430 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_f64(float64x1_t __a,float64x1_t __b)12431 vcle_f64 (float64x1_t __a, float64x1_t __b)
12432 {
12433   return (uint64x1_t) (__a <= __b);
12434 }
12435 
12436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_s8(int8x8_t __a,int8x8_t __b)12437 vcle_s8 (int8x8_t __a, int8x8_t __b)
12438 {
12439   return (uint8x8_t) (__a <= __b);
12440 }
12441 
12442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_s16(int16x4_t __a,int16x4_t __b)12443 vcle_s16 (int16x4_t __a, int16x4_t __b)
12444 {
12445   return (uint16x4_t) (__a <= __b);
12446 }
12447 
12448 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_s32(int32x2_t __a,int32x2_t __b)12449 vcle_s32 (int32x2_t __a, int32x2_t __b)
12450 {
12451   return (uint32x2_t) (__a <= __b);
12452 }
12453 
12454 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_s64(int64x1_t __a,int64x1_t __b)12455 vcle_s64 (int64x1_t __a, int64x1_t __b)
12456 {
12457   return (uint64x1_t) (__a <= __b);
12458 }
12459 
12460 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_u8(uint8x8_t __a,uint8x8_t __b)12461 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
12462 {
12463   return (__a <= __b);
12464 }
12465 
12466 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_u16(uint16x4_t __a,uint16x4_t __b)12467 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
12468 {
12469   return (__a <= __b);
12470 }
12471 
12472 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_u32(uint32x2_t __a,uint32x2_t __b)12473 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
12474 {
12475   return (__a <= __b);
12476 }
12477 
12478 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_u64(uint64x1_t __a,uint64x1_t __b)12479 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
12480 {
12481   return (__a <= __b);
12482 }
12483 
12484 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_f32(float32x4_t __a,float32x4_t __b)12485 vcleq_f32 (float32x4_t __a, float32x4_t __b)
12486 {
12487   return (uint32x4_t) (__a <= __b);
12488 }
12489 
12490 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_f64(float64x2_t __a,float64x2_t __b)12491 vcleq_f64 (float64x2_t __a, float64x2_t __b)
12492 {
12493   return (uint64x2_t) (__a <= __b);
12494 }
12495 
12496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_s8(int8x16_t __a,int8x16_t __b)12497 vcleq_s8 (int8x16_t __a, int8x16_t __b)
12498 {
12499   return (uint8x16_t) (__a <= __b);
12500 }
12501 
12502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_s16(int16x8_t __a,int16x8_t __b)12503 vcleq_s16 (int16x8_t __a, int16x8_t __b)
12504 {
12505   return (uint16x8_t) (__a <= __b);
12506 }
12507 
12508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_s32(int32x4_t __a,int32x4_t __b)12509 vcleq_s32 (int32x4_t __a, int32x4_t __b)
12510 {
12511   return (uint32x4_t) (__a <= __b);
12512 }
12513 
12514 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_s64(int64x2_t __a,int64x2_t __b)12515 vcleq_s64 (int64x2_t __a, int64x2_t __b)
12516 {
12517   return (uint64x2_t) (__a <= __b);
12518 }
12519 
12520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_u8(uint8x16_t __a,uint8x16_t __b)12521 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
12522 {
12523   return (__a <= __b);
12524 }
12525 
12526 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_u16(uint16x8_t __a,uint16x8_t __b)12527 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
12528 {
12529   return (__a <= __b);
12530 }
12531 
12532 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_u32(uint32x4_t __a,uint32x4_t __b)12533 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
12534 {
12535   return (__a <= __b);
12536 }
12537 
12538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_u64(uint64x2_t __a,uint64x2_t __b)12539 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
12540 {
12541   return (__a <= __b);
12542 }
12543 
12544 /* vcle - scalar.  */
12545 
12546 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcles_f32(float32_t __a,float32_t __b)12547 vcles_f32 (float32_t __a, float32_t __b)
12548 {
12549   return __a <= __b ? -1 : 0;
12550 }
12551 
12552 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcled_s64(int64_t __a,int64_t __b)12553 vcled_s64 (int64_t __a, int64_t __b)
12554 {
12555   return __a <= __b ? -1ll : 0ll;
12556 }
12557 
12558 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcled_u64(uint64_t __a,uint64_t __b)12559 vcled_u64 (uint64_t __a, uint64_t __b)
12560 {
12561   return __a <= __b ? -1ll : 0ll;
12562 }
12563 
12564 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcled_f64(float64_t __a,float64_t __b)12565 vcled_f64 (float64_t __a, float64_t __b)
12566 {
12567   return __a <= __b ? -1ll : 0ll;
12568 }
12569 
12570 /* vclez - vector.  */
12571 
12572 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclez_f32(float32x2_t __a)12573 vclez_f32 (float32x2_t __a)
12574 {
12575   return (uint32x2_t) (__a <= 0.0f);
12576 }
12577 
12578 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclez_f64(float64x1_t __a)12579 vclez_f64 (float64x1_t __a)
12580 {
12581   return (uint64x1_t) (__a <= (float64x1_t) {0.0});
12582 }
12583 
12584 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclez_s8(int8x8_t __a)12585 vclez_s8 (int8x8_t __a)
12586 {
12587   return (uint8x8_t) (__a <= 0);
12588 }
12589 
12590 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclez_s16(int16x4_t __a)12591 vclez_s16 (int16x4_t __a)
12592 {
12593   return (uint16x4_t) (__a <= 0);
12594 }
12595 
12596 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclez_s32(int32x2_t __a)12597 vclez_s32 (int32x2_t __a)
12598 {
12599   return (uint32x2_t) (__a <= 0);
12600 }
12601 
12602 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclez_s64(int64x1_t __a)12603 vclez_s64 (int64x1_t __a)
12604 {
12605   return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
12606 }
12607 
12608 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclezq_f32(float32x4_t __a)12609 vclezq_f32 (float32x4_t __a)
12610 {
12611   return (uint32x4_t) (__a <= 0.0f);
12612 }
12613 
12614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vclezq_f64(float64x2_t __a)12615 vclezq_f64 (float64x2_t __a)
12616 {
12617   return (uint64x2_t) (__a <= 0.0);
12618 }
12619 
12620 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vclezq_s8(int8x16_t __a)12621 vclezq_s8 (int8x16_t __a)
12622 {
12623   return (uint8x16_t) (__a <= 0);
12624 }
12625 
12626 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclezq_s16(int16x8_t __a)12627 vclezq_s16 (int16x8_t __a)
12628 {
12629   return (uint16x8_t) (__a <= 0);
12630 }
12631 
12632 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclezq_s32(int32x4_t __a)12633 vclezq_s32 (int32x4_t __a)
12634 {
12635   return (uint32x4_t) (__a <= 0);
12636 }
12637 
12638 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vclezq_s64(int64x2_t __a)12639 vclezq_s64 (int64x2_t __a)
12640 {
12641   return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
12642 }
12643 
12644 /* vclez - scalar.  */
12645 
12646 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vclezs_f32(float32_t __a)12647 vclezs_f32 (float32_t __a)
12648 {
12649   return __a <= 0.0f ? -1 : 0;
12650 }
12651 
12652 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vclezd_s64(int64_t __a)12653 vclezd_s64 (int64_t __a)
12654 {
12655   return __a <= 0 ? -1ll : 0ll;
12656 }
12657 
12658 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vclezd_f64(float64_t __a)12659 vclezd_f64 (float64_t __a)
12660 {
12661   return __a <= 0.0 ? -1ll : 0ll;
12662 }
12663 
12664 /* vclt - vector.  */
12665 
12666 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_f32(float32x2_t __a,float32x2_t __b)12667 vclt_f32 (float32x2_t __a, float32x2_t __b)
12668 {
12669   return (uint32x2_t) (__a < __b);
12670 }
12671 
12672 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_f64(float64x1_t __a,float64x1_t __b)12673 vclt_f64 (float64x1_t __a, float64x1_t __b)
12674 {
12675   return (uint64x1_t) (__a < __b);
12676 }
12677 
12678 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_s8(int8x8_t __a,int8x8_t __b)12679 vclt_s8 (int8x8_t __a, int8x8_t __b)
12680 {
12681   return (uint8x8_t) (__a < __b);
12682 }
12683 
12684 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_s16(int16x4_t __a,int16x4_t __b)12685 vclt_s16 (int16x4_t __a, int16x4_t __b)
12686 {
12687   return (uint16x4_t) (__a < __b);
12688 }
12689 
12690 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_s32(int32x2_t __a,int32x2_t __b)12691 vclt_s32 (int32x2_t __a, int32x2_t __b)
12692 {
12693   return (uint32x2_t) (__a < __b);
12694 }
12695 
12696 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_s64(int64x1_t __a,int64x1_t __b)12697 vclt_s64 (int64x1_t __a, int64x1_t __b)
12698 {
12699   return (uint64x1_t) (__a < __b);
12700 }
12701 
12702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_u8(uint8x8_t __a,uint8x8_t __b)12703 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
12704 {
12705   return (__a < __b);
12706 }
12707 
12708 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_u16(uint16x4_t __a,uint16x4_t __b)12709 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
12710 {
12711   return (__a < __b);
12712 }
12713 
12714 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_u32(uint32x2_t __a,uint32x2_t __b)12715 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
12716 {
12717   return (__a < __b);
12718 }
12719 
12720 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_u64(uint64x1_t __a,uint64x1_t __b)12721 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
12722 {
12723   return (__a < __b);
12724 }
12725 
12726 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_f32(float32x4_t __a,float32x4_t __b)12727 vcltq_f32 (float32x4_t __a, float32x4_t __b)
12728 {
12729   return (uint32x4_t) (__a < __b);
12730 }
12731 
12732 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_f64(float64x2_t __a,float64x2_t __b)12733 vcltq_f64 (float64x2_t __a, float64x2_t __b)
12734 {
12735   return (uint64x2_t) (__a < __b);
12736 }
12737 
12738 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_s8(int8x16_t __a,int8x16_t __b)12739 vcltq_s8 (int8x16_t __a, int8x16_t __b)
12740 {
12741   return (uint8x16_t) (__a < __b);
12742 }
12743 
12744 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_s16(int16x8_t __a,int16x8_t __b)12745 vcltq_s16 (int16x8_t __a, int16x8_t __b)
12746 {
12747   return (uint16x8_t) (__a < __b);
12748 }
12749 
12750 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_s32(int32x4_t __a,int32x4_t __b)12751 vcltq_s32 (int32x4_t __a, int32x4_t __b)
12752 {
12753   return (uint32x4_t) (__a < __b);
12754 }
12755 
12756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_s64(int64x2_t __a,int64x2_t __b)12757 vcltq_s64 (int64x2_t __a, int64x2_t __b)
12758 {
12759   return (uint64x2_t) (__a < __b);
12760 }
12761 
12762 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_u8(uint8x16_t __a,uint8x16_t __b)12763 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
12764 {
12765   return (__a < __b);
12766 }
12767 
12768 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_u16(uint16x8_t __a,uint16x8_t __b)12769 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
12770 {
12771   return (__a < __b);
12772 }
12773 
12774 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_u32(uint32x4_t __a,uint32x4_t __b)12775 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
12776 {
12777   return (__a < __b);
12778 }
12779 
12780 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_u64(uint64x2_t __a,uint64x2_t __b)12781 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
12782 {
12783   return (__a < __b);
12784 }
12785 
12786 /* vclt - scalar.  */
12787 
12788 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vclts_f32(float32_t __a,float32_t __b)12789 vclts_f32 (float32_t __a, float32_t __b)
12790 {
12791   return __a < __b ? -1 : 0;
12792 }
12793 
12794 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcltd_s64(int64_t __a,int64_t __b)12795 vcltd_s64 (int64_t __a, int64_t __b)
12796 {
12797   return __a < __b ? -1ll : 0ll;
12798 }
12799 
12800 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcltd_u64(uint64_t __a,uint64_t __b)12801 vcltd_u64 (uint64_t __a, uint64_t __b)
12802 {
12803   return __a < __b ? -1ll : 0ll;
12804 }
12805 
12806 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcltd_f64(float64_t __a,float64_t __b)12807 vcltd_f64 (float64_t __a, float64_t __b)
12808 {
12809   return __a < __b ? -1ll : 0ll;
12810 }
12811 
12812 /* vcltz - vector.  */
12813 
12814 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcltz_f32(float32x2_t __a)12815 vcltz_f32 (float32x2_t __a)
12816 {
12817   return (uint32x2_t) (__a < 0.0f);
12818 }
12819 
12820 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltz_f64(float64x1_t __a)12821 vcltz_f64 (float64x1_t __a)
12822 {
12823   return (uint64x1_t) (__a < (float64x1_t) {0.0});
12824 }
12825 
12826 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcltz_s8(int8x8_t __a)12827 vcltz_s8 (int8x8_t __a)
12828 {
12829   return (uint8x8_t) (__a < 0);
12830 }
12831 
12832 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcltz_s16(int16x4_t __a)12833 vcltz_s16 (int16x4_t __a)
12834 {
12835   return (uint16x4_t) (__a < 0);
12836 }
12837 
12838 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcltz_s32(int32x2_t __a)12839 vcltz_s32 (int32x2_t __a)
12840 {
12841   return (uint32x2_t) (__a < 0);
12842 }
12843 
12844 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltz_s64(int64x1_t __a)12845 vcltz_s64 (int64x1_t __a)
12846 {
12847   return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
12848 }
12849 
12850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltzq_f32(float32x4_t __a)12851 vcltzq_f32 (float32x4_t __a)
12852 {
12853   return (uint32x4_t) (__a < 0.0f);
12854 }
12855 
12856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltzq_f64(float64x2_t __a)12857 vcltzq_f64 (float64x2_t __a)
12858 {
12859   return (uint64x2_t) (__a < 0.0);
12860 }
12861 
12862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltzq_s8(int8x16_t __a)12863 vcltzq_s8 (int8x16_t __a)
12864 {
12865   return (uint8x16_t) (__a < 0);
12866 }
12867 
12868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltzq_s16(int16x8_t __a)12869 vcltzq_s16 (int16x8_t __a)
12870 {
12871   return (uint16x8_t) (__a < 0);
12872 }
12873 
12874 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltzq_s32(int32x4_t __a)12875 vcltzq_s32 (int32x4_t __a)
12876 {
12877   return (uint32x4_t) (__a < 0);
12878 }
12879 
12880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltzq_s64(int64x2_t __a)12881 vcltzq_s64 (int64x2_t __a)
12882 {
12883   return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
12884 }
12885 
12886 /* vcltz - scalar.  */
12887 
12888 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcltzs_f32(float32_t __a)12889 vcltzs_f32 (float32_t __a)
12890 {
12891   return __a < 0.0f ? -1 : 0;
12892 }
12893 
12894 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcltzd_s64(int64_t __a)12895 vcltzd_s64 (int64_t __a)
12896 {
12897   return __a < 0 ? -1ll : 0ll;
12898 }
12899 
12900 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcltzd_f64(float64_t __a)12901 vcltzd_f64 (float64_t __a)
12902 {
12903   return __a < 0.0 ? -1ll : 0ll;
12904 }
12905 
12906 /* vcls.  */
12907 
12908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcls_s8(int8x8_t __a)12909 vcls_s8 (int8x8_t __a)
12910 {
12911   return __builtin_aarch64_clrsbv8qi (__a);
12912 }
12913 
12914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcls_s16(int16x4_t __a)12915 vcls_s16 (int16x4_t __a)
12916 {
12917   return __builtin_aarch64_clrsbv4hi (__a);
12918 }
12919 
12920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcls_s32(int32x2_t __a)12921 vcls_s32 (int32x2_t __a)
12922 {
12923   return __builtin_aarch64_clrsbv2si (__a);
12924 }
12925 
12926 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclsq_s8(int8x16_t __a)12927 vclsq_s8 (int8x16_t __a)
12928 {
12929   return __builtin_aarch64_clrsbv16qi (__a);
12930 }
12931 
12932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclsq_s16(int16x8_t __a)12933 vclsq_s16 (int16x8_t __a)
12934 {
12935   return __builtin_aarch64_clrsbv8hi (__a);
12936 }
12937 
12938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclsq_s32(int32x4_t __a)12939 vclsq_s32 (int32x4_t __a)
12940 {
12941   return __builtin_aarch64_clrsbv4si (__a);
12942 }
12943 
12944 /* vclz.  */
12945 
12946 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vclz_s8(int8x8_t __a)12947 vclz_s8 (int8x8_t __a)
12948 {
12949   return __builtin_aarch64_clzv8qi (__a);
12950 }
12951 
12952 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vclz_s16(int16x4_t __a)12953 vclz_s16 (int16x4_t __a)
12954 {
12955   return __builtin_aarch64_clzv4hi (__a);
12956 }
12957 
12958 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vclz_s32(int32x2_t __a)12959 vclz_s32 (int32x2_t __a)
12960 {
12961   return __builtin_aarch64_clzv2si (__a);
12962 }
12963 
12964 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclz_u8(uint8x8_t __a)12965 vclz_u8 (uint8x8_t __a)
12966 {
12967   return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
12968 }
12969 
12970 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclz_u16(uint16x4_t __a)12971 vclz_u16 (uint16x4_t __a)
12972 {
12973   return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
12974 }
12975 
12976 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclz_u32(uint32x2_t __a)12977 vclz_u32 (uint32x2_t __a)
12978 {
12979   return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
12980 }
12981 
12982 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclzq_s8(int8x16_t __a)12983 vclzq_s8 (int8x16_t __a)
12984 {
12985   return __builtin_aarch64_clzv16qi (__a);
12986 }
12987 
12988 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclzq_s16(int16x8_t __a)12989 vclzq_s16 (int16x8_t __a)
12990 {
12991   return __builtin_aarch64_clzv8hi (__a);
12992 }
12993 
12994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclzq_s32(int32x4_t __a)12995 vclzq_s32 (int32x4_t __a)
12996 {
12997   return __builtin_aarch64_clzv4si (__a);
12998 }
12999 
13000 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vclzq_u8(uint8x16_t __a)13001 vclzq_u8 (uint8x16_t __a)
13002 {
13003   return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
13004 }
13005 
13006 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclzq_u16(uint16x8_t __a)13007 vclzq_u16 (uint16x8_t __a)
13008 {
13009   return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
13010 }
13011 
13012 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclzq_u32(uint32x4_t __a)13013 vclzq_u32 (uint32x4_t __a)
13014 {
13015   return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
13016 }
13017 
13018 /* vcnt.  */
13019 
13020 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcnt_p8(poly8x8_t __a)13021 vcnt_p8 (poly8x8_t __a)
13022 {
13023   return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13024 }
13025 
13026 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcnt_s8(int8x8_t __a)13027 vcnt_s8 (int8x8_t __a)
13028 {
13029   return __builtin_aarch64_popcountv8qi (__a);
13030 }
13031 
13032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcnt_u8(uint8x8_t __a)13033 vcnt_u8 (uint8x8_t __a)
13034 {
13035   return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13036 }
13037 
13038 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcntq_p8(poly8x16_t __a)13039 vcntq_p8 (poly8x16_t __a)
13040 {
13041   return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13042 }
13043 
13044 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcntq_s8(int8x16_t __a)13045 vcntq_s8 (int8x16_t __a)
13046 {
13047   return __builtin_aarch64_popcountv16qi (__a);
13048 }
13049 
13050 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcntq_u8(uint8x16_t __a)13051 vcntq_u8 (uint8x16_t __a)
13052 {
13053   return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13054 }
13055 
13056 /* vcvt (double -> float).  */
13057 
13058 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32(float32x4_t __a)13059 vcvt_f16_f32 (float32x4_t __a)
13060 {
13061   return __builtin_aarch64_float_truncate_lo_v4hf (__a);
13062 }
13063 
13064 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vcvt_high_f16_f32(float16x4_t __a,float32x4_t __b)13065 vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
13066 {
13067   return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
13068 }
13069 
13070 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_f64(float64x2_t __a)13071 vcvt_f32_f64 (float64x2_t __a)
13072 {
13073   return __builtin_aarch64_float_truncate_lo_v2sf (__a);
13074 }
13075 
13076 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_high_f32_f64(float32x2_t __a,float64x2_t __b)13077 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
13078 {
13079   return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
13080 }
13081 
13082 /* vcvt (float -> double).  */
13083 
13084 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16(float16x4_t __a)13085 vcvt_f32_f16 (float16x4_t __a)
13086 {
13087   return __builtin_aarch64_float_extend_lo_v4sf (__a);
13088 }
13089 
13090 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_f64_f32(float32x2_t __a)13091 vcvt_f64_f32 (float32x2_t __a)
13092 {
13093 
13094   return __builtin_aarch64_float_extend_lo_v2df (__a);
13095 }
13096 
13097 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_high_f32_f16(float16x8_t __a)13098 vcvt_high_f32_f16 (float16x8_t __a)
13099 {
13100   return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
13101 }
13102 
13103 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_high_f64_f32(float32x4_t __a)13104 vcvt_high_f64_f32 (float32x4_t __a)
13105 {
13106   return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
13107 }
13108 
13109 /* vcvt  (<u>int -> float)  */
13110 
13111 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtd_f64_s64(int64_t __a)13112 vcvtd_f64_s64 (int64_t __a)
13113 {
13114   return (float64_t) __a;
13115 }
13116 
13117 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtd_f64_u64(uint64_t __a)13118 vcvtd_f64_u64 (uint64_t __a)
13119 {
13120   return (float64_t) __a;
13121 }
13122 
13123 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvts_f32_s32(int32_t __a)13124 vcvts_f32_s32 (int32_t __a)
13125 {
13126   return (float32_t) __a;
13127 }
13128 
13129 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvts_f32_u32(uint32_t __a)13130 vcvts_f32_u32 (uint32_t __a)
13131 {
13132   return (float32_t) __a;
13133 }
13134 
13135 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_s32(int32x2_t __a)13136 vcvt_f32_s32 (int32x2_t __a)
13137 {
13138   return __builtin_aarch64_floatv2siv2sf (__a);
13139 }
13140 
13141 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_u32(uint32x2_t __a)13142 vcvt_f32_u32 (uint32x2_t __a)
13143 {
13144   return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
13145 }
13146 
13147 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_f32_s32(int32x4_t __a)13148 vcvtq_f32_s32 (int32x4_t __a)
13149 {
13150   return __builtin_aarch64_floatv4siv4sf (__a);
13151 }
13152 
13153 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_f32_u32(uint32x4_t __a)13154 vcvtq_f32_u32 (uint32x4_t __a)
13155 {
13156   return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
13157 }
13158 
13159 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvtq_f64_s64(int64x2_t __a)13160 vcvtq_f64_s64 (int64x2_t __a)
13161 {
13162   return __builtin_aarch64_floatv2div2df (__a);
13163 }
13164 
13165 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvtq_f64_u64(uint64x2_t __a)13166 vcvtq_f64_u64 (uint64x2_t __a)
13167 {
13168   return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
13169 }
13170 
13171 /* vcvt (float -> <u>int)  */
13172 
13173 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtd_s64_f64(float64_t __a)13174 vcvtd_s64_f64 (float64_t __a)
13175 {
13176   return (int64_t) __a;
13177 }
13178 
13179 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtd_u64_f64(float64_t __a)13180 vcvtd_u64_f64 (float64_t __a)
13181 {
13182   return (uint64_t) __a;
13183 }
13184 
13185 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvts_s32_f32(float32_t __a)13186 vcvts_s32_f32 (float32_t __a)
13187 {
13188   return (int32_t) __a;
13189 }
13190 
13191 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvts_u32_f32(float32_t __a)13192 vcvts_u32_f32 (float32_t __a)
13193 {
13194   return (uint32_t) __a;
13195 }
13196 
13197 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_s32_f32(float32x2_t __a)13198 vcvt_s32_f32 (float32x2_t __a)
13199 {
13200   return __builtin_aarch64_lbtruncv2sfv2si (__a);
13201 }
13202 
13203 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvt_u32_f32(float32x2_t __a)13204 vcvt_u32_f32 (float32x2_t __a)
13205 {
13206   return __builtin_aarch64_lbtruncuv2sfv2si_us (__a);
13207 }
13208 
13209 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtq_s32_f32(float32x4_t __a)13210 vcvtq_s32_f32 (float32x4_t __a)
13211 {
13212   return __builtin_aarch64_lbtruncv4sfv4si (__a);
13213 }
13214 
13215 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtq_u32_f32(float32x4_t __a)13216 vcvtq_u32_f32 (float32x4_t __a)
13217 {
13218   return __builtin_aarch64_lbtruncuv4sfv4si_us (__a);
13219 }
13220 
13221 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcvt_s64_f64(float64x1_t __a)13222 vcvt_s64_f64 (float64x1_t __a)
13223 {
13224   return (int64x1_t) {vcvtd_s64_f64 (__a[0])};
13225 }
13226 
13227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcvt_u64_f64(float64x1_t __a)13228 vcvt_u64_f64 (float64x1_t __a)
13229 {
13230   return (uint64x1_t) {vcvtd_u64_f64 (__a[0])};
13231 }
13232 
13233 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtq_s64_f64(float64x2_t __a)13234 vcvtq_s64_f64 (float64x2_t __a)
13235 {
13236   return __builtin_aarch64_lbtruncv2dfv2di (__a);
13237 }
13238 
13239 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtq_u64_f64(float64x2_t __a)13240 vcvtq_u64_f64 (float64x2_t __a)
13241 {
13242   return __builtin_aarch64_lbtruncuv2dfv2di_us (__a);
13243 }
13244 
13245 /* vcvta  */
13246 
13247 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtad_s64_f64(float64_t __a)13248 vcvtad_s64_f64 (float64_t __a)
13249 {
13250   return __builtin_aarch64_lrounddfdi (__a);
13251 }
13252 
13253 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtad_u64_f64(float64_t __a)13254 vcvtad_u64_f64 (float64_t __a)
13255 {
13256   return __builtin_aarch64_lroundudfdi_us (__a);
13257 }
13258 
13259 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvtas_s32_f32(float32_t __a)13260 vcvtas_s32_f32 (float32_t __a)
13261 {
13262   return __builtin_aarch64_lroundsfsi (__a);
13263 }
13264 
13265 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvtas_u32_f32(float32_t __a)13266 vcvtas_u32_f32 (float32_t __a)
13267 {
13268   return __builtin_aarch64_lroundusfsi_us (__a);
13269 }
13270 
13271 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvta_s32_f32(float32x2_t __a)13272 vcvta_s32_f32 (float32x2_t __a)
13273 {
13274   return __builtin_aarch64_lroundv2sfv2si (__a);
13275 }
13276 
13277 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvta_u32_f32(float32x2_t __a)13278 vcvta_u32_f32 (float32x2_t __a)
13279 {
13280   return __builtin_aarch64_lrounduv2sfv2si_us (__a);
13281 }
13282 
13283 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtaq_s32_f32(float32x4_t __a)13284 vcvtaq_s32_f32 (float32x4_t __a)
13285 {
13286   return __builtin_aarch64_lroundv4sfv4si (__a);
13287 }
13288 
13289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtaq_u32_f32(float32x4_t __a)13290 vcvtaq_u32_f32 (float32x4_t __a)
13291 {
13292   return __builtin_aarch64_lrounduv4sfv4si_us (__a);
13293 }
13294 
13295 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcvta_s64_f64(float64x1_t __a)13296 vcvta_s64_f64 (float64x1_t __a)
13297 {
13298   return (int64x1_t) {vcvtad_s64_f64 (__a[0])};
13299 }
13300 
13301 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcvta_u64_f64(float64x1_t __a)13302 vcvta_u64_f64 (float64x1_t __a)
13303 {
13304   return (uint64x1_t) {vcvtad_u64_f64 (__a[0])};
13305 }
13306 
13307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtaq_s64_f64(float64x2_t __a)13308 vcvtaq_s64_f64 (float64x2_t __a)
13309 {
13310   return __builtin_aarch64_lroundv2dfv2di (__a);
13311 }
13312 
13313 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtaq_u64_f64(float64x2_t __a)13314 vcvtaq_u64_f64 (float64x2_t __a)
13315 {
13316   return __builtin_aarch64_lrounduv2dfv2di_us (__a);
13317 }
13318 
13319 /* vcvtm  */
13320 
13321 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtmd_s64_f64(float64_t __a)13322 vcvtmd_s64_f64 (float64_t __a)
13323 {
13324   return __builtin_llfloor (__a);
13325 }
13326 
13327 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtmd_u64_f64(float64_t __a)13328 vcvtmd_u64_f64 (float64_t __a)
13329 {
13330   return __builtin_aarch64_lfloorudfdi_us (__a);
13331 }
13332 
13333 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvtms_s32_f32(float32_t __a)13334 vcvtms_s32_f32 (float32_t __a)
13335 {
13336   return __builtin_ifloorf (__a);
13337 }
13338 
13339 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvtms_u32_f32(float32_t __a)13340 vcvtms_u32_f32 (float32_t __a)
13341 {
13342   return __builtin_aarch64_lfloorusfsi_us (__a);
13343 }
13344 
13345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtm_s32_f32(float32x2_t __a)13346 vcvtm_s32_f32 (float32x2_t __a)
13347 {
13348   return __builtin_aarch64_lfloorv2sfv2si (__a);
13349 }
13350 
13351 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtm_u32_f32(float32x2_t __a)13352 vcvtm_u32_f32 (float32x2_t __a)
13353 {
13354   return __builtin_aarch64_lflooruv2sfv2si_us (__a);
13355 }
13356 
13357 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtmq_s32_f32(float32x4_t __a)13358 vcvtmq_s32_f32 (float32x4_t __a)
13359 {
13360   return __builtin_aarch64_lfloorv4sfv4si (__a);
13361 }
13362 
13363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtmq_u32_f32(float32x4_t __a)13364 vcvtmq_u32_f32 (float32x4_t __a)
13365 {
13366   return __builtin_aarch64_lflooruv4sfv4si_us (__a);
13367 }
13368 
13369 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcvtm_s64_f64(float64x1_t __a)13370 vcvtm_s64_f64 (float64x1_t __a)
13371 {
13372   return (int64x1_t) {vcvtmd_s64_f64 (__a[0])};
13373 }
13374 
13375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcvtm_u64_f64(float64x1_t __a)13376 vcvtm_u64_f64 (float64x1_t __a)
13377 {
13378   return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])};
13379 }
13380 
13381 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtmq_s64_f64(float64x2_t __a)13382 vcvtmq_s64_f64 (float64x2_t __a)
13383 {
13384   return __builtin_aarch64_lfloorv2dfv2di (__a);
13385 }
13386 
13387 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtmq_u64_f64(float64x2_t __a)13388 vcvtmq_u64_f64 (float64x2_t __a)
13389 {
13390   return __builtin_aarch64_lflooruv2dfv2di_us (__a);
13391 }
13392 
13393 /* vcvtn  */
13394 
13395 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtnd_s64_f64(float64_t __a)13396 vcvtnd_s64_f64 (float64_t __a)
13397 {
13398   return __builtin_aarch64_lfrintndfdi (__a);
13399 }
13400 
13401 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtnd_u64_f64(float64_t __a)13402 vcvtnd_u64_f64 (float64_t __a)
13403 {
13404   return __builtin_aarch64_lfrintnudfdi_us (__a);
13405 }
13406 
13407 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvtns_s32_f32(float32_t __a)13408 vcvtns_s32_f32 (float32_t __a)
13409 {
13410   return __builtin_aarch64_lfrintnsfsi (__a);
13411 }
13412 
13413 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvtns_u32_f32(float32_t __a)13414 vcvtns_u32_f32 (float32_t __a)
13415 {
13416   return __builtin_aarch64_lfrintnusfsi_us (__a);
13417 }
13418 
13419 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtn_s32_f32(float32x2_t __a)13420 vcvtn_s32_f32 (float32x2_t __a)
13421 {
13422   return __builtin_aarch64_lfrintnv2sfv2si (__a);
13423 }
13424 
13425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtn_u32_f32(float32x2_t __a)13426 vcvtn_u32_f32 (float32x2_t __a)
13427 {
13428   return __builtin_aarch64_lfrintnuv2sfv2si_us (__a);
13429 }
13430 
13431 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtnq_s32_f32(float32x4_t __a)13432 vcvtnq_s32_f32 (float32x4_t __a)
13433 {
13434   return __builtin_aarch64_lfrintnv4sfv4si (__a);
13435 }
13436 
13437 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtnq_u32_f32(float32x4_t __a)13438 vcvtnq_u32_f32 (float32x4_t __a)
13439 {
13440   return __builtin_aarch64_lfrintnuv4sfv4si_us (__a);
13441 }
13442 
13443 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcvtn_s64_f64(float64x1_t __a)13444 vcvtn_s64_f64 (float64x1_t __a)
13445 {
13446   return (int64x1_t) {vcvtnd_s64_f64 (__a[0])};
13447 }
13448 
13449 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcvtn_u64_f64(float64x1_t __a)13450 vcvtn_u64_f64 (float64x1_t __a)
13451 {
13452   return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])};
13453 }
13454 
13455 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtnq_s64_f64(float64x2_t __a)13456 vcvtnq_s64_f64 (float64x2_t __a)
13457 {
13458   return __builtin_aarch64_lfrintnv2dfv2di (__a);
13459 }
13460 
13461 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtnq_u64_f64(float64x2_t __a)13462 vcvtnq_u64_f64 (float64x2_t __a)
13463 {
13464   return __builtin_aarch64_lfrintnuv2dfv2di_us (__a);
13465 }
13466 
13467 /* vcvtp  */
13468 
13469 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtpd_s64_f64(float64_t __a)13470 vcvtpd_s64_f64 (float64_t __a)
13471 {
13472   return __builtin_llceil (__a);
13473 }
13474 
13475 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtpd_u64_f64(float64_t __a)13476 vcvtpd_u64_f64 (float64_t __a)
13477 {
13478   return __builtin_aarch64_lceiludfdi_us (__a);
13479 }
13480 
13481 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvtps_s32_f32(float32_t __a)13482 vcvtps_s32_f32 (float32_t __a)
13483 {
13484   return __builtin_iceilf (__a);
13485 }
13486 
13487 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvtps_u32_f32(float32_t __a)13488 vcvtps_u32_f32 (float32_t __a)
13489 {
13490   return __builtin_aarch64_lceilusfsi_us (__a);
13491 }
13492 
13493 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtp_s32_f32(float32x2_t __a)13494 vcvtp_s32_f32 (float32x2_t __a)
13495 {
13496   return __builtin_aarch64_lceilv2sfv2si (__a);
13497 }
13498 
13499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtp_u32_f32(float32x2_t __a)13500 vcvtp_u32_f32 (float32x2_t __a)
13501 {
13502   return __builtin_aarch64_lceiluv2sfv2si_us (__a);
13503 }
13504 
13505 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtpq_s32_f32(float32x4_t __a)13506 vcvtpq_s32_f32 (float32x4_t __a)
13507 {
13508   return __builtin_aarch64_lceilv4sfv4si (__a);
13509 }
13510 
13511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtpq_u32_f32(float32x4_t __a)13512 vcvtpq_u32_f32 (float32x4_t __a)
13513 {
13514   return __builtin_aarch64_lceiluv4sfv4si_us (__a);
13515 }
13516 
13517 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcvtp_s64_f64(float64x1_t __a)13518 vcvtp_s64_f64 (float64x1_t __a)
13519 {
13520   return (int64x1_t) {vcvtpd_s64_f64 (__a[0])};
13521 }
13522 
13523 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcvtp_u64_f64(float64x1_t __a)13524 vcvtp_u64_f64 (float64x1_t __a)
13525 {
13526   return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])};
13527 }
13528 
13529 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtpq_s64_f64(float64x2_t __a)13530 vcvtpq_s64_f64 (float64x2_t __a)
13531 {
13532   return __builtin_aarch64_lceilv2dfv2di (__a);
13533 }
13534 
13535 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtpq_u64_f64(float64x2_t __a)13536 vcvtpq_u64_f64 (float64x2_t __a)
13537 {
13538   return __builtin_aarch64_lceiluv2dfv2di_us (__a);
13539 }
13540 
13541 /* vdup_n  */
13542 
13543 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdup_n_f32(float32_t __a)13544 vdup_n_f32 (float32_t __a)
13545 {
13546   return (float32x2_t) {__a, __a};
13547 }
13548 
13549 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vdup_n_f64(float64_t __a)13550 vdup_n_f64 (float64_t __a)
13551 {
13552   return (float64x1_t) {__a};
13553 }
13554 
13555 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vdup_n_p8(poly8_t __a)13556 vdup_n_p8 (poly8_t __a)
13557 {
13558   return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13559 }
13560 
13561 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vdup_n_p16(poly16_t __a)13562 vdup_n_p16 (poly16_t __a)
13563 {
13564   return (poly16x4_t) {__a, __a, __a, __a};
13565 }
13566 
13567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_n_s8(int8_t __a)13568 vdup_n_s8 (int8_t __a)
13569 {
13570   return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13571 }
13572 
13573 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vdup_n_s16(int16_t __a)13574 vdup_n_s16 (int16_t __a)
13575 {
13576   return (int16x4_t) {__a, __a, __a, __a};
13577 }
13578 
13579 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vdup_n_s32(int32_t __a)13580 vdup_n_s32 (int32_t __a)
13581 {
13582   return (int32x2_t) {__a, __a};
13583 }
13584 
13585 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_n_s64(int64_t __a)13586 vdup_n_s64 (int64_t __a)
13587 {
13588   return (int64x1_t) {__a};
13589 }
13590 
13591 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vdup_n_u8(uint8_t __a)13592 vdup_n_u8 (uint8_t __a)
13593 {
13594   return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13595 }
13596 
13597 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vdup_n_u16(uint16_t __a)13598 vdup_n_u16 (uint16_t __a)
13599 {
13600   return (uint16x4_t) {__a, __a, __a, __a};
13601 }
13602 
13603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vdup_n_u32(uint32_t __a)13604 vdup_n_u32 (uint32_t __a)
13605 {
13606   return (uint32x2_t) {__a, __a};
13607 }
13608 
13609 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdup_n_u64(uint64_t __a)13610 vdup_n_u64 (uint64_t __a)
13611 {
13612   return (uint64x1_t) {__a};
13613 }
13614 
13615 /* vdupq_n  */
13616 
13617 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdupq_n_f32(float32_t __a)13618 vdupq_n_f32 (float32_t __a)
13619 {
13620   return (float32x4_t) {__a, __a, __a, __a};
13621 }
13622 
13623 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdupq_n_f64(float64_t __a)13624 vdupq_n_f64 (float64_t __a)
13625 {
13626   return (float64x2_t) {__a, __a};
13627 }
13628 
13629 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vdupq_n_p8(uint32_t __a)13630 vdupq_n_p8 (uint32_t __a)
13631 {
13632   return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13633 		       __a, __a, __a, __a, __a, __a, __a, __a};
13634 }
13635 
13636 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vdupq_n_p16(uint32_t __a)13637 vdupq_n_p16 (uint32_t __a)
13638 {
13639   return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13640 }
13641 
13642 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_n_s8(int32_t __a)13643 vdupq_n_s8 (int32_t __a)
13644 {
13645   return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13646 		      __a, __a, __a, __a, __a, __a, __a, __a};
13647 }
13648 
13649 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vdupq_n_s16(int32_t __a)13650 vdupq_n_s16 (int32_t __a)
13651 {
13652   return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13653 }
13654 
13655 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vdupq_n_s32(int32_t __a)13656 vdupq_n_s32 (int32_t __a)
13657 {
13658   return (int32x4_t) {__a, __a, __a, __a};
13659 }
13660 
13661 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vdupq_n_s64(int64_t __a)13662 vdupq_n_s64 (int64_t __a)
13663 {
13664   return (int64x2_t) {__a, __a};
13665 }
13666 
13667 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vdupq_n_u8(uint32_t __a)13668 vdupq_n_u8 (uint32_t __a)
13669 {
13670   return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13671 		       __a, __a, __a, __a, __a, __a, __a, __a};
13672 }
13673 
13674 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vdupq_n_u16(uint32_t __a)13675 vdupq_n_u16 (uint32_t __a)
13676 {
13677   return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13678 }
13679 
13680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vdupq_n_u32(uint32_t __a)13681 vdupq_n_u32 (uint32_t __a)
13682 {
13683   return (uint32x4_t) {__a, __a, __a, __a};
13684 }
13685 
13686 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vdupq_n_u64(uint64_t __a)13687 vdupq_n_u64 (uint64_t __a)
13688 {
13689   return (uint64x2_t) {__a, __a};
13690 }
13691 
13692 /* vdup_lane  */
13693 
13694 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdup_lane_f32(float32x2_t __a,const int __b)13695 vdup_lane_f32 (float32x2_t __a, const int __b)
13696 {
13697   return __aarch64_vdup_lane_f32 (__a, __b);
13698 }
13699 
13700 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vdup_lane_f64(float64x1_t __a,const int __b)13701 vdup_lane_f64 (float64x1_t __a, const int __b)
13702 {
13703   return __aarch64_vdup_lane_f64 (__a, __b);
13704 }
13705 
13706 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vdup_lane_p8(poly8x8_t __a,const int __b)13707 vdup_lane_p8 (poly8x8_t __a, const int __b)
13708 {
13709   return __aarch64_vdup_lane_p8 (__a, __b);
13710 }
13711 
13712 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vdup_lane_p16(poly16x4_t __a,const int __b)13713 vdup_lane_p16 (poly16x4_t __a, const int __b)
13714 {
13715   return __aarch64_vdup_lane_p16 (__a, __b);
13716 }
13717 
13718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_lane_s8(int8x8_t __a,const int __b)13719 vdup_lane_s8 (int8x8_t __a, const int __b)
13720 {
13721   return __aarch64_vdup_lane_s8 (__a, __b);
13722 }
13723 
13724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vdup_lane_s16(int16x4_t __a,const int __b)13725 vdup_lane_s16 (int16x4_t __a, const int __b)
13726 {
13727   return __aarch64_vdup_lane_s16 (__a, __b);
13728 }
13729 
13730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vdup_lane_s32(int32x2_t __a,const int __b)13731 vdup_lane_s32 (int32x2_t __a, const int __b)
13732 {
13733   return __aarch64_vdup_lane_s32 (__a, __b);
13734 }
13735 
13736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_lane_s64(int64x1_t __a,const int __b)13737 vdup_lane_s64 (int64x1_t __a, const int __b)
13738 {
13739   return __aarch64_vdup_lane_s64 (__a, __b);
13740 }
13741 
13742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vdup_lane_u8(uint8x8_t __a,const int __b)13743 vdup_lane_u8 (uint8x8_t __a, const int __b)
13744 {
13745   return __aarch64_vdup_lane_u8 (__a, __b);
13746 }
13747 
13748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vdup_lane_u16(uint16x4_t __a,const int __b)13749 vdup_lane_u16 (uint16x4_t __a, const int __b)
13750 {
13751   return __aarch64_vdup_lane_u16 (__a, __b);
13752 }
13753 
13754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vdup_lane_u32(uint32x2_t __a,const int __b)13755 vdup_lane_u32 (uint32x2_t __a, const int __b)
13756 {
13757   return __aarch64_vdup_lane_u32 (__a, __b);
13758 }
13759 
13760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdup_lane_u64(uint64x1_t __a,const int __b)13761 vdup_lane_u64 (uint64x1_t __a, const int __b)
13762 {
13763   return __aarch64_vdup_lane_u64 (__a, __b);
13764 }
13765 
13766 /* vdup_laneq  */
13767 
13768 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdup_laneq_f32(float32x4_t __a,const int __b)13769 vdup_laneq_f32 (float32x4_t __a, const int __b)
13770 {
13771   return __aarch64_vdup_laneq_f32 (__a, __b);
13772 }
13773 
13774 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vdup_laneq_f64(float64x2_t __a,const int __b)13775 vdup_laneq_f64 (float64x2_t __a, const int __b)
13776 {
13777   return __aarch64_vdup_laneq_f64 (__a, __b);
13778 }
13779 
13780 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vdup_laneq_p8(poly8x16_t __a,const int __b)13781 vdup_laneq_p8 (poly8x16_t __a, const int __b)
13782 {
13783   return __aarch64_vdup_laneq_p8 (__a, __b);
13784 }
13785 
13786 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vdup_laneq_p16(poly16x8_t __a,const int __b)13787 vdup_laneq_p16 (poly16x8_t __a, const int __b)
13788 {
13789   return __aarch64_vdup_laneq_p16 (__a, __b);
13790 }
13791 
13792 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_laneq_s8(int8x16_t __a,const int __b)13793 vdup_laneq_s8 (int8x16_t __a, const int __b)
13794 {
13795   return __aarch64_vdup_laneq_s8 (__a, __b);
13796 }
13797 
13798 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vdup_laneq_s16(int16x8_t __a,const int __b)13799 vdup_laneq_s16 (int16x8_t __a, const int __b)
13800 {
13801   return __aarch64_vdup_laneq_s16 (__a, __b);
13802 }
13803 
13804 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vdup_laneq_s32(int32x4_t __a,const int __b)13805 vdup_laneq_s32 (int32x4_t __a, const int __b)
13806 {
13807   return __aarch64_vdup_laneq_s32 (__a, __b);
13808 }
13809 
13810 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_laneq_s64(int64x2_t __a,const int __b)13811 vdup_laneq_s64 (int64x2_t __a, const int __b)
13812 {
13813   return __aarch64_vdup_laneq_s64 (__a, __b);
13814 }
13815 
13816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vdup_laneq_u8(uint8x16_t __a,const int __b)13817 vdup_laneq_u8 (uint8x16_t __a, const int __b)
13818 {
13819   return __aarch64_vdup_laneq_u8 (__a, __b);
13820 }
13821 
13822 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vdup_laneq_u16(uint16x8_t __a,const int __b)13823 vdup_laneq_u16 (uint16x8_t __a, const int __b)
13824 {
13825   return __aarch64_vdup_laneq_u16 (__a, __b);
13826 }
13827 
13828 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vdup_laneq_u32(uint32x4_t __a,const int __b)13829 vdup_laneq_u32 (uint32x4_t __a, const int __b)
13830 {
13831   return __aarch64_vdup_laneq_u32 (__a, __b);
13832 }
13833 
13834 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdup_laneq_u64(uint64x2_t __a,const int __b)13835 vdup_laneq_u64 (uint64x2_t __a, const int __b)
13836 {
13837   return __aarch64_vdup_laneq_u64 (__a, __b);
13838 }
13839 
13840 /* vdupq_lane  */
13841 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdupq_lane_f32(float32x2_t __a,const int __b)13842 vdupq_lane_f32 (float32x2_t __a, const int __b)
13843 {
13844   return __aarch64_vdupq_lane_f32 (__a, __b);
13845 }
13846 
13847 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdupq_lane_f64(float64x1_t __a,const int __b)13848 vdupq_lane_f64 (float64x1_t __a, const int __b)
13849 {
13850   return __aarch64_vdupq_lane_f64 (__a, __b);
13851 }
13852 
13853 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vdupq_lane_p8(poly8x8_t __a,const int __b)13854 vdupq_lane_p8 (poly8x8_t __a, const int __b)
13855 {
13856   return __aarch64_vdupq_lane_p8 (__a, __b);
13857 }
13858 
13859 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vdupq_lane_p16(poly16x4_t __a,const int __b)13860 vdupq_lane_p16 (poly16x4_t __a, const int __b)
13861 {
13862   return __aarch64_vdupq_lane_p16 (__a, __b);
13863 }
13864 
13865 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_lane_s8(int8x8_t __a,const int __b)13866 vdupq_lane_s8 (int8x8_t __a, const int __b)
13867 {
13868   return __aarch64_vdupq_lane_s8 (__a, __b);
13869 }
13870 
13871 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vdupq_lane_s16(int16x4_t __a,const int __b)13872 vdupq_lane_s16 (int16x4_t __a, const int __b)
13873 {
13874   return __aarch64_vdupq_lane_s16 (__a, __b);
13875 }
13876 
13877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vdupq_lane_s32(int32x2_t __a,const int __b)13878 vdupq_lane_s32 (int32x2_t __a, const int __b)
13879 {
13880   return __aarch64_vdupq_lane_s32 (__a, __b);
13881 }
13882 
13883 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vdupq_lane_s64(int64x1_t __a,const int __b)13884 vdupq_lane_s64 (int64x1_t __a, const int __b)
13885 {
13886   return __aarch64_vdupq_lane_s64 (__a, __b);
13887 }
13888 
13889 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vdupq_lane_u8(uint8x8_t __a,const int __b)13890 vdupq_lane_u8 (uint8x8_t __a, const int __b)
13891 {
13892   return __aarch64_vdupq_lane_u8 (__a, __b);
13893 }
13894 
13895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vdupq_lane_u16(uint16x4_t __a,const int __b)13896 vdupq_lane_u16 (uint16x4_t __a, const int __b)
13897 {
13898   return __aarch64_vdupq_lane_u16 (__a, __b);
13899 }
13900 
13901 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vdupq_lane_u32(uint32x2_t __a,const int __b)13902 vdupq_lane_u32 (uint32x2_t __a, const int __b)
13903 {
13904   return __aarch64_vdupq_lane_u32 (__a, __b);
13905 }
13906 
13907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vdupq_lane_u64(uint64x1_t __a,const int __b)13908 vdupq_lane_u64 (uint64x1_t __a, const int __b)
13909 {
13910   return __aarch64_vdupq_lane_u64 (__a, __b);
13911 }
13912 
13913 /* vdupq_laneq  */
13914 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdupq_laneq_f32(float32x4_t __a,const int __b)13915 vdupq_laneq_f32 (float32x4_t __a, const int __b)
13916 {
13917   return __aarch64_vdupq_laneq_f32 (__a, __b);
13918 }
13919 
13920 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdupq_laneq_f64(float64x2_t __a,const int __b)13921 vdupq_laneq_f64 (float64x2_t __a, const int __b)
13922 {
13923   return __aarch64_vdupq_laneq_f64 (__a, __b);
13924 }
13925 
13926 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vdupq_laneq_p8(poly8x16_t __a,const int __b)13927 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
13928 {
13929   return __aarch64_vdupq_laneq_p8 (__a, __b);
13930 }
13931 
13932 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vdupq_laneq_p16(poly16x8_t __a,const int __b)13933 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
13934 {
13935   return __aarch64_vdupq_laneq_p16 (__a, __b);
13936 }
13937 
13938 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_laneq_s8(int8x16_t __a,const int __b)13939 vdupq_laneq_s8 (int8x16_t __a, const int __b)
13940 {
13941   return __aarch64_vdupq_laneq_s8 (__a, __b);
13942 }
13943 
13944 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vdupq_laneq_s16(int16x8_t __a,const int __b)13945 vdupq_laneq_s16 (int16x8_t __a, const int __b)
13946 {
13947   return __aarch64_vdupq_laneq_s16 (__a, __b);
13948 }
13949 
13950 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vdupq_laneq_s32(int32x4_t __a,const int __b)13951 vdupq_laneq_s32 (int32x4_t __a, const int __b)
13952 {
13953   return __aarch64_vdupq_laneq_s32 (__a, __b);
13954 }
13955 
13956 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vdupq_laneq_s64(int64x2_t __a,const int __b)13957 vdupq_laneq_s64 (int64x2_t __a, const int __b)
13958 {
13959   return __aarch64_vdupq_laneq_s64 (__a, __b);
13960 }
13961 
13962 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vdupq_laneq_u8(uint8x16_t __a,const int __b)13963 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
13964 {
13965   return __aarch64_vdupq_laneq_u8 (__a, __b);
13966 }
13967 
13968 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vdupq_laneq_u16(uint16x8_t __a,const int __b)13969 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
13970 {
13971   return __aarch64_vdupq_laneq_u16 (__a, __b);
13972 }
13973 
13974 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vdupq_laneq_u32(uint32x4_t __a,const int __b)13975 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
13976 {
13977   return __aarch64_vdupq_laneq_u32 (__a, __b);
13978 }
13979 
13980 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vdupq_laneq_u64(uint64x2_t __a,const int __b)13981 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
13982 {
13983   return __aarch64_vdupq_laneq_u64 (__a, __b);
13984 }
13985 
13986 /* vdupb_lane  */
13987 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vdupb_lane_p8(poly8x8_t __a,const int __b)13988 vdupb_lane_p8 (poly8x8_t __a, const int __b)
13989 {
13990   return __aarch64_vget_lane_any (__a, __b);
13991 }
13992 
13993 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vdupb_lane_s8(int8x8_t __a,const int __b)13994 vdupb_lane_s8 (int8x8_t __a, const int __b)
13995 {
13996   return __aarch64_vget_lane_any (__a, __b);
13997 }
13998 
13999 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vdupb_lane_u8(uint8x8_t __a,const int __b)14000 vdupb_lane_u8 (uint8x8_t __a, const int __b)
14001 {
14002   return __aarch64_vget_lane_any (__a, __b);
14003 }
14004 
14005 /* vduph_lane  */
14006 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vduph_lane_p16(poly16x4_t __a,const int __b)14007 vduph_lane_p16 (poly16x4_t __a, const int __b)
14008 {
14009   return __aarch64_vget_lane_any (__a, __b);
14010 }
14011 
14012 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vduph_lane_s16(int16x4_t __a,const int __b)14013 vduph_lane_s16 (int16x4_t __a, const int __b)
14014 {
14015   return __aarch64_vget_lane_any (__a, __b);
14016 }
14017 
14018 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vduph_lane_u16(uint16x4_t __a,const int __b)14019 vduph_lane_u16 (uint16x4_t __a, const int __b)
14020 {
14021   return __aarch64_vget_lane_any (__a, __b);
14022 }
14023 
14024 /* vdups_lane  */
14025 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vdups_lane_f32(float32x2_t __a,const int __b)14026 vdups_lane_f32 (float32x2_t __a, const int __b)
14027 {
14028   return __aarch64_vget_lane_any (__a, __b);
14029 }
14030 
14031 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vdups_lane_s32(int32x2_t __a,const int __b)14032 vdups_lane_s32 (int32x2_t __a, const int __b)
14033 {
14034   return __aarch64_vget_lane_any (__a, __b);
14035 }
14036 
14037 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vdups_lane_u32(uint32x2_t __a,const int __b)14038 vdups_lane_u32 (uint32x2_t __a, const int __b)
14039 {
14040   return __aarch64_vget_lane_any (__a, __b);
14041 }
14042 
14043 /* vdupd_lane  */
14044 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vdupd_lane_f64(float64x1_t __a,const int __b)14045 vdupd_lane_f64 (float64x1_t __a, const int __b)
14046 {
14047   __AARCH64_LANE_CHECK (__a, __b);
14048   return __a[0];
14049 }
14050 
14051 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vdupd_lane_s64(int64x1_t __a,const int __b)14052 vdupd_lane_s64 (int64x1_t __a, const int __b)
14053 {
14054   __AARCH64_LANE_CHECK (__a, __b);
14055   return __a[0];
14056 }
14057 
14058 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vdupd_lane_u64(uint64x1_t __a,const int __b)14059 vdupd_lane_u64 (uint64x1_t __a, const int __b)
14060 {
14061   __AARCH64_LANE_CHECK (__a, __b);
14062   return __a[0];
14063 }
14064 
14065 /* vdupb_laneq  */
14066 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vdupb_laneq_p8(poly8x16_t __a,const int __b)14067 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
14068 {
14069   return __aarch64_vget_lane_any (__a, __b);
14070 }
14071 
14072 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vdupb_laneq_s8(int8x16_t __a,const int __b)14073 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
14074 {
14075   return __aarch64_vget_lane_any (__a, __b);
14076 }
14077 
14078 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vdupb_laneq_u8(uint8x16_t __a,const int __b)14079 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
14080 {
14081   return __aarch64_vget_lane_any (__a, __b);
14082 }
14083 
14084 /* vduph_laneq  */
14085 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vduph_laneq_p16(poly16x8_t __a,const int __b)14086 vduph_laneq_p16 (poly16x8_t __a, const int __b)
14087 {
14088   return __aarch64_vget_lane_any (__a, __b);
14089 }
14090 
14091 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vduph_laneq_s16(int16x8_t __a,const int __b)14092 vduph_laneq_s16 (int16x8_t __a, const int __b)
14093 {
14094   return __aarch64_vget_lane_any (__a, __b);
14095 }
14096 
14097 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vduph_laneq_u16(uint16x8_t __a,const int __b)14098 vduph_laneq_u16 (uint16x8_t __a, const int __b)
14099 {
14100   return __aarch64_vget_lane_any (__a, __b);
14101 }
14102 
14103 /* vdups_laneq  */
14104 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vdups_laneq_f32(float32x4_t __a,const int __b)14105 vdups_laneq_f32 (float32x4_t __a, const int __b)
14106 {
14107   return __aarch64_vget_lane_any (__a, __b);
14108 }
14109 
14110 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vdups_laneq_s32(int32x4_t __a,const int __b)14111 vdups_laneq_s32 (int32x4_t __a, const int __b)
14112 {
14113   return __aarch64_vget_lane_any (__a, __b);
14114 }
14115 
14116 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vdups_laneq_u32(uint32x4_t __a,const int __b)14117 vdups_laneq_u32 (uint32x4_t __a, const int __b)
14118 {
14119   return __aarch64_vget_lane_any (__a, __b);
14120 }
14121 
14122 /* vdupd_laneq  */
14123 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vdupd_laneq_f64(float64x2_t __a,const int __b)14124 vdupd_laneq_f64 (float64x2_t __a, const int __b)
14125 {
14126   return __aarch64_vget_lane_any (__a, __b);
14127 }
14128 
14129 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vdupd_laneq_s64(int64x2_t __a,const int __b)14130 vdupd_laneq_s64 (int64x2_t __a, const int __b)
14131 {
14132   return __aarch64_vget_lane_any (__a, __b);
14133 }
14134 
14135 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vdupd_laneq_u64(uint64x2_t __a,const int __b)14136 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
14137 {
14138   return __aarch64_vget_lane_any (__a, __b);
14139 }
14140 
14141 /* vext  */
14142 
14143 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vext_f32(float32x2_t __a,float32x2_t __b,__const int __c)14144 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
14145 {
14146   __AARCH64_LANE_CHECK (__a, __c);
14147 #ifdef __AARCH64EB__
14148   return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14149 #else
14150   return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14151 #endif
14152 }
14153 
14154 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vext_f64(float64x1_t __a,float64x1_t __b,__const int __c)14155 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
14156 {
14157   __AARCH64_LANE_CHECK (__a, __c);
14158   /* The only possible index to the assembler instruction returns element 0.  */
14159   return __a;
14160 }
14161 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vext_p8(poly8x8_t __a,poly8x8_t __b,__const int __c)14162 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
14163 {
14164   __AARCH64_LANE_CHECK (__a, __c);
14165 #ifdef __AARCH64EB__
14166   return __builtin_shuffle (__b, __a, (uint8x8_t)
14167       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14168 #else
14169   return __builtin_shuffle (__a, __b,
14170       (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14171 #endif
14172 }
14173 
14174 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vext_p16(poly16x4_t __a,poly16x4_t __b,__const int __c)14175 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
14176 {
14177   __AARCH64_LANE_CHECK (__a, __c);
14178 #ifdef __AARCH64EB__
14179   return __builtin_shuffle (__b, __a,
14180       (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14181 #else
14182   return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14183 #endif
14184 }
14185 
14186 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vext_s8(int8x8_t __a,int8x8_t __b,__const int __c)14187 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
14188 {
14189   __AARCH64_LANE_CHECK (__a, __c);
14190 #ifdef __AARCH64EB__
14191   return __builtin_shuffle (__b, __a, (uint8x8_t)
14192       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14193 #else
14194   return __builtin_shuffle (__a, __b,
14195       (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14196 #endif
14197 }
14198 
14199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vext_s16(int16x4_t __a,int16x4_t __b,__const int __c)14200 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
14201 {
14202   __AARCH64_LANE_CHECK (__a, __c);
14203 #ifdef __AARCH64EB__
14204   return __builtin_shuffle (__b, __a,
14205       (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14206 #else
14207   return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14208 #endif
14209 }
14210 
14211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vext_s32(int32x2_t __a,int32x2_t __b,__const int __c)14212 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
14213 {
14214   __AARCH64_LANE_CHECK (__a, __c);
14215 #ifdef __AARCH64EB__
14216   return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14217 #else
14218   return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14219 #endif
14220 }
14221 
14222 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vext_s64(int64x1_t __a,int64x1_t __b,__const int __c)14223 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
14224 {
14225   __AARCH64_LANE_CHECK (__a, __c);
14226   /* The only possible index to the assembler instruction returns element 0.  */
14227   return __a;
14228 }
14229 
14230 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vext_u8(uint8x8_t __a,uint8x8_t __b,__const int __c)14231 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
14232 {
14233   __AARCH64_LANE_CHECK (__a, __c);
14234 #ifdef __AARCH64EB__
14235   return __builtin_shuffle (__b, __a, (uint8x8_t)
14236       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14237 #else
14238   return __builtin_shuffle (__a, __b,
14239       (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14240 #endif
14241 }
14242 
14243 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vext_u16(uint16x4_t __a,uint16x4_t __b,__const int __c)14244 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
14245 {
14246   __AARCH64_LANE_CHECK (__a, __c);
14247 #ifdef __AARCH64EB__
14248   return __builtin_shuffle (__b, __a,
14249       (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14250 #else
14251   return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14252 #endif
14253 }
14254 
14255 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vext_u32(uint32x2_t __a,uint32x2_t __b,__const int __c)14256 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
14257 {
14258   __AARCH64_LANE_CHECK (__a, __c);
14259 #ifdef __AARCH64EB__
14260   return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14261 #else
14262   return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14263 #endif
14264 }
14265 
14266 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vext_u64(uint64x1_t __a,uint64x1_t __b,__const int __c)14267 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
14268 {
14269   __AARCH64_LANE_CHECK (__a, __c);
14270   /* The only possible index to the assembler instruction returns element 0.  */
14271   return __a;
14272 }
14273 
14274 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vextq_f32(float32x4_t __a,float32x4_t __b,__const int __c)14275 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
14276 {
14277   __AARCH64_LANE_CHECK (__a, __c);
14278 #ifdef __AARCH64EB__
14279   return __builtin_shuffle (__b, __a,
14280       (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14281 #else
14282   return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14283 #endif
14284 }
14285 
14286 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vextq_f64(float64x2_t __a,float64x2_t __b,__const int __c)14287 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
14288 {
14289   __AARCH64_LANE_CHECK (__a, __c);
14290 #ifdef __AARCH64EB__
14291   return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14292 #else
14293   return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14294 #endif
14295 }
14296 
14297 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vextq_p8(poly8x16_t __a,poly8x16_t __b,__const int __c)14298 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
14299 {
14300   __AARCH64_LANE_CHECK (__a, __c);
14301 #ifdef __AARCH64EB__
14302   return __builtin_shuffle (__b, __a, (uint8x16_t)
14303       {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14304        24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14305 #else
14306   return __builtin_shuffle (__a, __b, (uint8x16_t)
14307       {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14308        __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14309 #endif
14310 }
14311 
14312 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vextq_p16(poly16x8_t __a,poly16x8_t __b,__const int __c)14313 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
14314 {
14315   __AARCH64_LANE_CHECK (__a, __c);
14316 #ifdef __AARCH64EB__
14317   return __builtin_shuffle (__b, __a, (uint16x8_t)
14318       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14319 #else
14320   return __builtin_shuffle (__a, __b,
14321       (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14322 #endif
14323 }
14324 
14325 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vextq_s8(int8x16_t __a,int8x16_t __b,__const int __c)14326 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
14327 {
14328   __AARCH64_LANE_CHECK (__a, __c);
14329 #ifdef __AARCH64EB__
14330   return __builtin_shuffle (__b, __a, (uint8x16_t)
14331       {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14332        24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14333 #else
14334   return __builtin_shuffle (__a, __b, (uint8x16_t)
14335       {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14336        __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14337 #endif
14338 }
14339 
14340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vextq_s16(int16x8_t __a,int16x8_t __b,__const int __c)14341 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
14342 {
14343   __AARCH64_LANE_CHECK (__a, __c);
14344 #ifdef __AARCH64EB__
14345   return __builtin_shuffle (__b, __a, (uint16x8_t)
14346       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14347 #else
14348   return __builtin_shuffle (__a, __b,
14349       (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14350 #endif
14351 }
14352 
14353 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vextq_s32(int32x4_t __a,int32x4_t __b,__const int __c)14354 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
14355 {
14356   __AARCH64_LANE_CHECK (__a, __c);
14357 #ifdef __AARCH64EB__
14358   return __builtin_shuffle (__b, __a,
14359       (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14360 #else
14361   return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14362 #endif
14363 }
14364 
14365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vextq_s64(int64x2_t __a,int64x2_t __b,__const int __c)14366 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
14367 {
14368   __AARCH64_LANE_CHECK (__a, __c);
14369 #ifdef __AARCH64EB__
14370   return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14371 #else
14372   return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14373 #endif
14374 }
14375 
14376 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vextq_u8(uint8x16_t __a,uint8x16_t __b,__const int __c)14377 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
14378 {
14379   __AARCH64_LANE_CHECK (__a, __c);
14380 #ifdef __AARCH64EB__
14381   return __builtin_shuffle (__b, __a, (uint8x16_t)
14382       {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14383        24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14384 #else
14385   return __builtin_shuffle (__a, __b, (uint8x16_t)
14386       {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14387        __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14388 #endif
14389 }
14390 
14391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vextq_u16(uint16x8_t __a,uint16x8_t __b,__const int __c)14392 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
14393 {
14394   __AARCH64_LANE_CHECK (__a, __c);
14395 #ifdef __AARCH64EB__
14396   return __builtin_shuffle (__b, __a, (uint16x8_t)
14397       {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14398 #else
14399   return __builtin_shuffle (__a, __b,
14400       (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14401 #endif
14402 }
14403 
14404 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vextq_u32(uint32x4_t __a,uint32x4_t __b,__const int __c)14405 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
14406 {
14407   __AARCH64_LANE_CHECK (__a, __c);
14408 #ifdef __AARCH64EB__
14409   return __builtin_shuffle (__b, __a,
14410       (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14411 #else
14412   return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14413 #endif
14414 }
14415 
14416 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vextq_u64(uint64x2_t __a,uint64x2_t __b,__const int __c)14417 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
14418 {
14419   __AARCH64_LANE_CHECK (__a, __c);
14420 #ifdef __AARCH64EB__
14421   return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14422 #else
14423   return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14424 #endif
14425 }
14426 
14427 /* vfma  */
14428 
14429 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfma_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c)14430 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14431 {
14432   return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14433 }
14434 
14435 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c)14436 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14437 {
14438   return __builtin_aarch64_fmav2sf (__b, __c, __a);
14439 }
14440 
14441 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c)14442 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14443 {
14444   return __builtin_aarch64_fmav4sf (__b, __c, __a);
14445 }
14446 
14447 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_f64(float64x2_t __a,float64x2_t __b,float64x2_t __c)14448 vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14449 {
14450   return __builtin_aarch64_fmav2df (__b, __c, __a);
14451 }
14452 
14453 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_n_f32(float32x2_t __a,float32x2_t __b,float32_t __c)14454 vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
14455 {
14456   return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
14457 }
14458 
14459 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_n_f32(float32x4_t __a,float32x4_t __b,float32_t __c)14460 vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
14461 {
14462   return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
14463 }
14464 
14465 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_n_f64(float64x2_t __a,float64x2_t __b,float64_t __c)14466 vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
14467 {
14468   return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
14469 }
14470 
14471 /* vfma_lane  */
14472 
14473 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_lane_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c,const int __lane)14474 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
14475 	       float32x2_t __c, const int __lane)
14476 {
14477   return __builtin_aarch64_fmav2sf (__b,
14478 				    __aarch64_vdup_lane_f32 (__c, __lane),
14479 				    __a);
14480 }
14481 
14482 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfma_lane_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c,const int __lane)14483 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
14484 	       float64x1_t __c, const int __lane)
14485 {
14486   return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14487 }
14488 
14489 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vfmad_lane_f64(float64_t __a,float64_t __b,float64x1_t __c,const int __lane)14490 vfmad_lane_f64 (float64_t __a, float64_t __b,
14491 	        float64x1_t __c, const int __lane)
14492 {
14493   return __builtin_fma (__b, __c[0], __a);
14494 }
14495 
14496 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vfmas_lane_f32(float32_t __a,float32_t __b,float32x2_t __c,const int __lane)14497 vfmas_lane_f32 (float32_t __a, float32_t __b,
14498 	        float32x2_t __c, const int __lane)
14499 {
14500   return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14501 }
14502 
14503 /* vfma_laneq  */
14504 
14505 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_laneq_f32(float32x2_t __a,float32x2_t __b,float32x4_t __c,const int __lane)14506 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
14507 	        float32x4_t __c, const int __lane)
14508 {
14509   return __builtin_aarch64_fmav2sf (__b,
14510 				    __aarch64_vdup_laneq_f32 (__c, __lane),
14511 				    __a);
14512 }
14513 
14514 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfma_laneq_f64(float64x1_t __a,float64x1_t __b,float64x2_t __c,const int __lane)14515 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
14516 	        float64x2_t __c, const int __lane)
14517 {
14518   float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14519   return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
14520 }
14521 
14522 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vfmad_laneq_f64(float64_t __a,float64_t __b,float64x2_t __c,const int __lane)14523 vfmad_laneq_f64 (float64_t __a, float64_t __b,
14524 	         float64x2_t __c, const int __lane)
14525 {
14526   return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14527 }
14528 
14529 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vfmas_laneq_f32(float32_t __a,float32_t __b,float32x4_t __c,const int __lane)14530 vfmas_laneq_f32 (float32_t __a, float32_t __b,
14531 		 float32x4_t __c, const int __lane)
14532 {
14533   return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14534 }
14535 
14536 /* vfmaq_lane  */
14537 
14538 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_lane_f32(float32x4_t __a,float32x4_t __b,float32x2_t __c,const int __lane)14539 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
14540 	        float32x2_t __c, const int __lane)
14541 {
14542   return __builtin_aarch64_fmav4sf (__b,
14543 				    __aarch64_vdupq_lane_f32 (__c, __lane),
14544 				    __a);
14545 }
14546 
14547 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_lane_f64(float64x2_t __a,float64x2_t __b,float64x1_t __c,const int __lane)14548 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
14549 	        float64x1_t __c, const int __lane)
14550 {
14551   return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
14552 }
14553 
14554 /* vfmaq_laneq  */
14555 
14556 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_laneq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c,const int __lane)14557 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14558 	         float32x4_t __c, const int __lane)
14559 {
14560   return __builtin_aarch64_fmav4sf (__b,
14561 				    __aarch64_vdupq_laneq_f32 (__c, __lane),
14562 				    __a);
14563 }
14564 
14565 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_laneq_f64(float64x2_t __a,float64x2_t __b,float64x2_t __c,const int __lane)14566 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14567 	         float64x2_t __c, const int __lane)
14568 {
14569   return __builtin_aarch64_fmav2df (__b,
14570 				    __aarch64_vdupq_laneq_f64 (__c, __lane),
14571 				    __a);
14572 }
14573 
14574 /* vfms  */
14575 
14576 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfms_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c)14577 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14578 {
14579   return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14580 }
14581 
14582 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c)14583 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14584 {
14585   return __builtin_aarch64_fmav2sf (-__b, __c, __a);
14586 }
14587 
14588 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c)14589 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14590 {
14591   return __builtin_aarch64_fmav4sf (-__b, __c, __a);
14592 }
14593 
14594 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmsq_f64(float64x2_t __a,float64x2_t __b,float64x2_t __c)14595 vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14596 {
14597   return __builtin_aarch64_fmav2df (-__b, __c, __a);
14598 }
14599 
14600 
14601 /* vfms_lane  */
14602 
14603 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_lane_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c,const int __lane)14604 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
14605 	       float32x2_t __c, const int __lane)
14606 {
14607   return __builtin_aarch64_fmav2sf (-__b,
14608 				    __aarch64_vdup_lane_f32 (__c, __lane),
14609 				    __a);
14610 }
14611 
14612 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfms_lane_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c,const int __lane)14613 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
14614 	       float64x1_t __c, const int __lane)
14615 {
14616   return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14617 }
14618 
14619 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vfmsd_lane_f64(float64_t __a,float64_t __b,float64x1_t __c,const int __lane)14620 vfmsd_lane_f64 (float64_t __a, float64_t __b,
14621 	        float64x1_t __c, const int __lane)
14622 {
14623   return __builtin_fma (-__b, __c[0], __a);
14624 }
14625 
14626 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vfmss_lane_f32(float32_t __a,float32_t __b,float32x2_t __c,const int __lane)14627 vfmss_lane_f32 (float32_t __a, float32_t __b,
14628 	        float32x2_t __c, const int __lane)
14629 {
14630   return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14631 }
14632 
14633 /* vfms_laneq  */
14634 
14635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_laneq_f32(float32x2_t __a,float32x2_t __b,float32x4_t __c,const int __lane)14636 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
14637 	        float32x4_t __c, const int __lane)
14638 {
14639   return __builtin_aarch64_fmav2sf (-__b,
14640 				    __aarch64_vdup_laneq_f32 (__c, __lane),
14641 				    __a);
14642 }
14643 
14644 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfms_laneq_f64(float64x1_t __a,float64x1_t __b,float64x2_t __c,const int __lane)14645 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
14646 	        float64x2_t __c, const int __lane)
14647 {
14648   float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14649   return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
14650 }
14651 
14652 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vfmsd_laneq_f64(float64_t __a,float64_t __b,float64x2_t __c,const int __lane)14653 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
14654 	         float64x2_t __c, const int __lane)
14655 {
14656   return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14657 }
14658 
14659 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vfmss_laneq_f32(float32_t __a,float32_t __b,float32x4_t __c,const int __lane)14660 vfmss_laneq_f32 (float32_t __a, float32_t __b,
14661 		 float32x4_t __c, const int __lane)
14662 {
14663   return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14664 }
14665 
14666 /* vfmsq_lane  */
14667 
14668 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_lane_f32(float32x4_t __a,float32x4_t __b,float32x2_t __c,const int __lane)14669 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
14670 	        float32x2_t __c, const int __lane)
14671 {
14672   return __builtin_aarch64_fmav4sf (-__b,
14673 				    __aarch64_vdupq_lane_f32 (__c, __lane),
14674 				    __a);
14675 }
14676 
14677 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmsq_lane_f64(float64x2_t __a,float64x2_t __b,float64x1_t __c,const int __lane)14678 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
14679 	        float64x1_t __c, const int __lane)
14680 {
14681   return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
14682 }
14683 
14684 /* vfmsq_laneq  */
14685 
14686 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_laneq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c,const int __lane)14687 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14688 	         float32x4_t __c, const int __lane)
14689 {
14690   return __builtin_aarch64_fmav4sf (-__b,
14691 				    __aarch64_vdupq_laneq_f32 (__c, __lane),
14692 				    __a);
14693 }
14694 
14695 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmsq_laneq_f64(float64x2_t __a,float64x2_t __b,float64x2_t __c,const int __lane)14696 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14697 	         float64x2_t __c, const int __lane)
14698 {
14699   return __builtin_aarch64_fmav2df (-__b,
14700 				    __aarch64_vdupq_laneq_f64 (__c, __lane),
14701 				    __a);
14702 }
14703 
14704 /* vld1 */
14705 
14706 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vld1_f16(const float16_t * __a)14707 vld1_f16 (const float16_t *__a)
14708 {
14709   return __builtin_aarch64_ld1v4hf (__a);
14710 }
14711 
14712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32(const float32_t * a)14713 vld1_f32 (const float32_t *a)
14714 {
14715   return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
14716 }
14717 
14718 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_f64(const float64_t * a)14719 vld1_f64 (const float64_t *a)
14720 {
14721   return (float64x1_t) {*a};
14722 }
14723 
14724 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_p8(const poly8_t * a)14725 vld1_p8 (const poly8_t *a)
14726 {
14727   return (poly8x8_t)
14728     __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14729 }
14730 
14731 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_p16(const poly16_t * a)14732 vld1_p16 (const poly16_t *a)
14733 {
14734   return (poly16x4_t)
14735     __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14736 }
14737 
14738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8(const int8_t * a)14739 vld1_s8 (const int8_t *a)
14740 {
14741   return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14742 }
14743 
14744 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_s16(const int16_t * a)14745 vld1_s16 (const int16_t *a)
14746 {
14747   return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14748 }
14749 
14750 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_s32(const int32_t * a)14751 vld1_s32 (const int32_t *a)
14752 {
14753   return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14754 }
14755 
14756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_s64(const int64_t * a)14757 vld1_s64 (const int64_t *a)
14758 {
14759   return (int64x1_t) {*a};
14760 }
14761 
14762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_u8(const uint8_t * a)14763 vld1_u8 (const uint8_t *a)
14764 {
14765   return (uint8x8_t)
14766     __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14767 }
14768 
14769 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_u16(const uint16_t * a)14770 vld1_u16 (const uint16_t *a)
14771 {
14772   return (uint16x4_t)
14773     __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14774 }
14775 
14776 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_u32(const uint32_t * a)14777 vld1_u32 (const uint32_t *a)
14778 {
14779   return (uint32x2_t)
14780     __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14781 }
14782 
14783 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_u64(const uint64_t * a)14784 vld1_u64 (const uint64_t *a)
14785 {
14786   return (uint64x1_t) {*a};
14787 }
14788 
14789 /* vld1q */
14790 
14791 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vld1q_f16(const float16_t * __a)14792 vld1q_f16 (const float16_t *__a)
14793 {
14794   return __builtin_aarch64_ld1v8hf (__a);
14795 }
14796 
14797 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32(const float32_t * a)14798 vld1q_f32 (const float32_t *a)
14799 {
14800   return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
14801 }
14802 
14803 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_f64(const float64_t * a)14804 vld1q_f64 (const float64_t *a)
14805 {
14806   return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
14807 }
14808 
14809 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_p8(const poly8_t * a)14810 vld1q_p8 (const poly8_t *a)
14811 {
14812   return (poly8x16_t)
14813     __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14814 }
14815 
14816 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_p16(const poly16_t * a)14817 vld1q_p16 (const poly16_t *a)
14818 {
14819   return (poly16x8_t)
14820     __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14821 }
14822 
14823 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8(const int8_t * a)14824 vld1q_s8 (const int8_t *a)
14825 {
14826   return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14827 }
14828 
14829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_s16(const int16_t * a)14830 vld1q_s16 (const int16_t *a)
14831 {
14832   return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14833 }
14834 
14835 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_s32(const int32_t * a)14836 vld1q_s32 (const int32_t *a)
14837 {
14838   return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14839 }
14840 
14841 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_s64(const int64_t * a)14842 vld1q_s64 (const int64_t *a)
14843 {
14844   return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14845 }
14846 
14847 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_u8(const uint8_t * a)14848 vld1q_u8 (const uint8_t *a)
14849 {
14850   return (uint8x16_t)
14851     __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14852 }
14853 
14854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_u16(const uint16_t * a)14855 vld1q_u16 (const uint16_t *a)
14856 {
14857   return (uint16x8_t)
14858     __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14859 }
14860 
14861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_u32(const uint32_t * a)14862 vld1q_u32 (const uint32_t *a)
14863 {
14864   return (uint32x4_t)
14865     __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14866 }
14867 
14868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_u64(const uint64_t * a)14869 vld1q_u64 (const uint64_t *a)
14870 {
14871   return (uint64x2_t)
14872     __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14873 }
14874 
14875 /* vld1_dup  */
14876 
14877 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vld1_dup_f16(const float16_t * __a)14878 vld1_dup_f16 (const float16_t* __a)
14879 {
14880   float16_t __f = *__a;
14881   return (float16x4_t) { __f, __f, __f, __f };
14882 }
14883 
14884 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32(const float32_t * __a)14885 vld1_dup_f32 (const float32_t* __a)
14886 {
14887   return vdup_n_f32 (*__a);
14888 }
14889 
14890 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_dup_f64(const float64_t * __a)14891 vld1_dup_f64 (const float64_t* __a)
14892 {
14893   return vdup_n_f64 (*__a);
14894 }
14895 
14896 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_dup_p8(const poly8_t * __a)14897 vld1_dup_p8 (const poly8_t* __a)
14898 {
14899   return vdup_n_p8 (*__a);
14900 }
14901 
14902 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_dup_p16(const poly16_t * __a)14903 vld1_dup_p16 (const poly16_t* __a)
14904 {
14905   return vdup_n_p16 (*__a);
14906 }
14907 
14908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_dup_s8(const int8_t * __a)14909 vld1_dup_s8 (const int8_t* __a)
14910 {
14911   return vdup_n_s8 (*__a);
14912 }
14913 
14914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_dup_s16(const int16_t * __a)14915 vld1_dup_s16 (const int16_t* __a)
14916 {
14917   return vdup_n_s16 (*__a);
14918 }
14919 
14920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_dup_s32(const int32_t * __a)14921 vld1_dup_s32 (const int32_t* __a)
14922 {
14923   return vdup_n_s32 (*__a);
14924 }
14925 
14926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_dup_s64(const int64_t * __a)14927 vld1_dup_s64 (const int64_t* __a)
14928 {
14929   return vdup_n_s64 (*__a);
14930 }
14931 
14932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_dup_u8(const uint8_t * __a)14933 vld1_dup_u8 (const uint8_t* __a)
14934 {
14935   return vdup_n_u8 (*__a);
14936 }
14937 
14938 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_dup_u16(const uint16_t * __a)14939 vld1_dup_u16 (const uint16_t* __a)
14940 {
14941   return vdup_n_u16 (*__a);
14942 }
14943 
14944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_dup_u32(const uint32_t * __a)14945 vld1_dup_u32 (const uint32_t* __a)
14946 {
14947   return vdup_n_u32 (*__a);
14948 }
14949 
14950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_dup_u64(const uint64_t * __a)14951 vld1_dup_u64 (const uint64_t* __a)
14952 {
14953   return vdup_n_u64 (*__a);
14954 }
14955 
14956 /* vld1q_dup  */
14957 
14958 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vld1q_dup_f16(const float16_t * __a)14959 vld1q_dup_f16 (const float16_t* __a)
14960 {
14961   float16_t __f = *__a;
14962   return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f };
14963 }
14964 
14965 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32(const float32_t * __a)14966 vld1q_dup_f32 (const float32_t* __a)
14967 {
14968   return vdupq_n_f32 (*__a);
14969 }
14970 
14971 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_dup_f64(const float64_t * __a)14972 vld1q_dup_f64 (const float64_t* __a)
14973 {
14974   return vdupq_n_f64 (*__a);
14975 }
14976 
14977 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_dup_p8(const poly8_t * __a)14978 vld1q_dup_p8 (const poly8_t* __a)
14979 {
14980   return vdupq_n_p8 (*__a);
14981 }
14982 
14983 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_dup_p16(const poly16_t * __a)14984 vld1q_dup_p16 (const poly16_t* __a)
14985 {
14986   return vdupq_n_p16 (*__a);
14987 }
14988 
14989 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_dup_s8(const int8_t * __a)14990 vld1q_dup_s8 (const int8_t* __a)
14991 {
14992   return vdupq_n_s8 (*__a);
14993 }
14994 
14995 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_dup_s16(const int16_t * __a)14996 vld1q_dup_s16 (const int16_t* __a)
14997 {
14998   return vdupq_n_s16 (*__a);
14999 }
15000 
15001 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_dup_s32(const int32_t * __a)15002 vld1q_dup_s32 (const int32_t* __a)
15003 {
15004   return vdupq_n_s32 (*__a);
15005 }
15006 
15007 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_dup_s64(const int64_t * __a)15008 vld1q_dup_s64 (const int64_t* __a)
15009 {
15010   return vdupq_n_s64 (*__a);
15011 }
15012 
15013 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_dup_u8(const uint8_t * __a)15014 vld1q_dup_u8 (const uint8_t* __a)
15015 {
15016   return vdupq_n_u8 (*__a);
15017 }
15018 
15019 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_dup_u16(const uint16_t * __a)15020 vld1q_dup_u16 (const uint16_t* __a)
15021 {
15022   return vdupq_n_u16 (*__a);
15023 }
15024 
15025 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_dup_u32(const uint32_t * __a)15026 vld1q_dup_u32 (const uint32_t* __a)
15027 {
15028   return vdupq_n_u32 (*__a);
15029 }
15030 
15031 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_dup_u64(const uint64_t * __a)15032 vld1q_dup_u64 (const uint64_t* __a)
15033 {
15034   return vdupq_n_u64 (*__a);
15035 }
15036 
15037 /* vld1_lane  */
15038 
15039 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vld1_lane_f16(const float16_t * __src,float16x4_t __vec,const int __lane)15040 vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
15041 {
15042   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15043 }
15044 
15045 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_lane_f32(const float32_t * __src,float32x2_t __vec,const int __lane)15046 vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
15047 {
15048   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15049 }
15050 
15051 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_lane_f64(const float64_t * __src,float64x1_t __vec,const int __lane)15052 vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
15053 {
15054   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15055 }
15056 
15057 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_lane_p8(const poly8_t * __src,poly8x8_t __vec,const int __lane)15058 vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
15059 {
15060   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15061 }
15062 
15063 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_lane_p16(const poly16_t * __src,poly16x4_t __vec,const int __lane)15064 vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
15065 {
15066   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15067 }
15068 
15069 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_lane_s8(const int8_t * __src,int8x8_t __vec,const int __lane)15070 vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
15071 {
15072   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15073 }
15074 
15075 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_lane_s16(const int16_t * __src,int16x4_t __vec,const int __lane)15076 vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
15077 {
15078   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15079 }
15080 
15081 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_lane_s32(const int32_t * __src,int32x2_t __vec,const int __lane)15082 vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
15083 {
15084   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15085 }
15086 
15087 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_lane_s64(const int64_t * __src,int64x1_t __vec,const int __lane)15088 vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
15089 {
15090   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15091 }
15092 
15093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_lane_u8(const uint8_t * __src,uint8x8_t __vec,const int __lane)15094 vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
15095 {
15096   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15097 }
15098 
15099 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_lane_u16(const uint16_t * __src,uint16x4_t __vec,const int __lane)15100 vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
15101 {
15102   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15103 }
15104 
15105 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_lane_u32(const uint32_t * __src,uint32x2_t __vec,const int __lane)15106 vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
15107 {
15108   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15109 }
15110 
15111 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_lane_u64(const uint64_t * __src,uint64x1_t __vec,const int __lane)15112 vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
15113 {
15114   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15115 }
15116 
15117 /* vld1q_lane  */
15118 
15119 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vld1q_lane_f16(const float16_t * __src,float16x8_t __vec,const int __lane)15120 vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
15121 {
15122   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15123 }
15124 
15125 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_lane_f32(const float32_t * __src,float32x4_t __vec,const int __lane)15126 vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
15127 {
15128   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15129 }
15130 
15131 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_lane_f64(const float64_t * __src,float64x2_t __vec,const int __lane)15132 vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
15133 {
15134   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15135 }
15136 
15137 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_lane_p8(const poly8_t * __src,poly8x16_t __vec,const int __lane)15138 vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
15139 {
15140   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15141 }
15142 
15143 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_lane_p16(const poly16_t * __src,poly16x8_t __vec,const int __lane)15144 vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
15145 {
15146   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15147 }
15148 
15149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_lane_s8(const int8_t * __src,int8x16_t __vec,const int __lane)15150 vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
15151 {
15152   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15153 }
15154 
15155 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_lane_s16(const int16_t * __src,int16x8_t __vec,const int __lane)15156 vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
15157 {
15158   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15159 }
15160 
15161 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_lane_s32(const int32_t * __src,int32x4_t __vec,const int __lane)15162 vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
15163 {
15164   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15165 }
15166 
15167 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_lane_s64(const int64_t * __src,int64x2_t __vec,const int __lane)15168 vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
15169 {
15170   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15171 }
15172 
15173 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_lane_u8(const uint8_t * __src,uint8x16_t __vec,const int __lane)15174 vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
15175 {
15176   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15177 }
15178 
15179 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_lane_u16(const uint16_t * __src,uint16x8_t __vec,const int __lane)15180 vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
15181 {
15182   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15183 }
15184 
15185 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_lane_u32(const uint32_t * __src,uint32x4_t __vec,const int __lane)15186 vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
15187 {
15188   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15189 }
15190 
15191 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_lane_u64(const uint64_t * __src,uint64x2_t __vec,const int __lane)15192 vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
15193 {
15194   return __aarch64_vset_lane_any (*__src, __vec, __lane);
15195 }
15196 
15197 /* vldn */
15198 
15199 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_s64(const int64_t * __a)15200 vld2_s64 (const int64_t * __a)
15201 {
15202   int64x1x2_t ret;
15203   __builtin_aarch64_simd_oi __o;
15204   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15205   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15206   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15207   return ret;
15208 }
15209 
15210 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
vld2_u64(const uint64_t * __a)15211 vld2_u64 (const uint64_t * __a)
15212 {
15213   uint64x1x2_t ret;
15214   __builtin_aarch64_simd_oi __o;
15215   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15216   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15217   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15218   return ret;
15219 }
15220 
15221 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
vld2_f64(const float64_t * __a)15222 vld2_f64 (const float64_t * __a)
15223 {
15224   float64x1x2_t ret;
15225   __builtin_aarch64_simd_oi __o;
15226   __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
15227   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
15228   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
15229   return ret;
15230 }
15231 
15232 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vld2_s8(const int8_t * __a)15233 vld2_s8 (const int8_t * __a)
15234 {
15235   int8x8x2_t ret;
15236   __builtin_aarch64_simd_oi __o;
15237   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15238   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15239   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15240   return ret;
15241 }
15242 
15243 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vld2_p8(const poly8_t * __a)15244 vld2_p8 (const poly8_t * __a)
15245 {
15246   poly8x8x2_t ret;
15247   __builtin_aarch64_simd_oi __o;
15248   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15249   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15250   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15251   return ret;
15252 }
15253 
15254 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vld2_s16(const int16_t * __a)15255 vld2_s16 (const int16_t * __a)
15256 {
15257   int16x4x2_t ret;
15258   __builtin_aarch64_simd_oi __o;
15259   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15260   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15261   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15262   return ret;
15263 }
15264 
15265 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vld2_p16(const poly16_t * __a)15266 vld2_p16 (const poly16_t * __a)
15267 {
15268   poly16x4x2_t ret;
15269   __builtin_aarch64_simd_oi __o;
15270   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15271   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15272   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15273   return ret;
15274 }
15275 
15276 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vld2_s32(const int32_t * __a)15277 vld2_s32 (const int32_t * __a)
15278 {
15279   int32x2x2_t ret;
15280   __builtin_aarch64_simd_oi __o;
15281   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15282   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15283   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15284   return ret;
15285 }
15286 
15287 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vld2_u8(const uint8_t * __a)15288 vld2_u8 (const uint8_t * __a)
15289 {
15290   uint8x8x2_t ret;
15291   __builtin_aarch64_simd_oi __o;
15292   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15293   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15294   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15295   return ret;
15296 }
15297 
15298 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vld2_u16(const uint16_t * __a)15299 vld2_u16 (const uint16_t * __a)
15300 {
15301   uint16x4x2_t ret;
15302   __builtin_aarch64_simd_oi __o;
15303   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15304   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15305   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15306   return ret;
15307 }
15308 
15309 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vld2_u32(const uint32_t * __a)15310 vld2_u32 (const uint32_t * __a)
15311 {
15312   uint32x2x2_t ret;
15313   __builtin_aarch64_simd_oi __o;
15314   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15315   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15316   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15317   return ret;
15318 }
15319 
15320 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vld2_f16(const float16_t * __a)15321 vld2_f16 (const float16_t * __a)
15322 {
15323   float16x4x2_t ret;
15324   __builtin_aarch64_simd_oi __o;
15325   __o = __builtin_aarch64_ld2v4hf (__a);
15326   ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
15327   ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
15328   return ret;
15329 }
15330 
15331 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32(const float32_t * __a)15332 vld2_f32 (const float32_t * __a)
15333 {
15334   float32x2x2_t ret;
15335   __builtin_aarch64_simd_oi __o;
15336   __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
15337   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
15338   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
15339   return ret;
15340 }
15341 
15342 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vld2q_s8(const int8_t * __a)15343 vld2q_s8 (const int8_t * __a)
15344 {
15345   int8x16x2_t ret;
15346   __builtin_aarch64_simd_oi __o;
15347   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15348   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15349   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15350   return ret;
15351 }
15352 
15353 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vld2q_p8(const poly8_t * __a)15354 vld2q_p8 (const poly8_t * __a)
15355 {
15356   poly8x16x2_t ret;
15357   __builtin_aarch64_simd_oi __o;
15358   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15359   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15360   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15361   return ret;
15362 }
15363 
15364 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vld2q_s16(const int16_t * __a)15365 vld2q_s16 (const int16_t * __a)
15366 {
15367   int16x8x2_t ret;
15368   __builtin_aarch64_simd_oi __o;
15369   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15370   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15371   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15372   return ret;
15373 }
15374 
15375 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vld2q_p16(const poly16_t * __a)15376 vld2q_p16 (const poly16_t * __a)
15377 {
15378   poly16x8x2_t ret;
15379   __builtin_aarch64_simd_oi __o;
15380   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15381   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15382   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15383   return ret;
15384 }
15385 
15386 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vld2q_s32(const int32_t * __a)15387 vld2q_s32 (const int32_t * __a)
15388 {
15389   int32x4x2_t ret;
15390   __builtin_aarch64_simd_oi __o;
15391   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15392   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15393   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15394   return ret;
15395 }
15396 
15397 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
vld2q_s64(const int64_t * __a)15398 vld2q_s64 (const int64_t * __a)
15399 {
15400   int64x2x2_t ret;
15401   __builtin_aarch64_simd_oi __o;
15402   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15403   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15404   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15405   return ret;
15406 }
15407 
15408 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vld2q_u8(const uint8_t * __a)15409 vld2q_u8 (const uint8_t * __a)
15410 {
15411   uint8x16x2_t ret;
15412   __builtin_aarch64_simd_oi __o;
15413   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15414   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15415   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15416   return ret;
15417 }
15418 
15419 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vld2q_u16(const uint16_t * __a)15420 vld2q_u16 (const uint16_t * __a)
15421 {
15422   uint16x8x2_t ret;
15423   __builtin_aarch64_simd_oi __o;
15424   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15425   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15426   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15427   return ret;
15428 }
15429 
15430 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vld2q_u32(const uint32_t * __a)15431 vld2q_u32 (const uint32_t * __a)
15432 {
15433   uint32x4x2_t ret;
15434   __builtin_aarch64_simd_oi __o;
15435   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15436   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15437   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15438   return ret;
15439 }
15440 
15441 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
vld2q_u64(const uint64_t * __a)15442 vld2q_u64 (const uint64_t * __a)
15443 {
15444   uint64x2x2_t ret;
15445   __builtin_aarch64_simd_oi __o;
15446   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15447   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15448   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15449   return ret;
15450 }
15451 
15452 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vld2q_f16(const float16_t * __a)15453 vld2q_f16 (const float16_t * __a)
15454 {
15455   float16x8x2_t ret;
15456   __builtin_aarch64_simd_oi __o;
15457   __o = __builtin_aarch64_ld2v8hf (__a);
15458   ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
15459   ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
15460   return ret;
15461 }
15462 
15463 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32(const float32_t * __a)15464 vld2q_f32 (const float32_t * __a)
15465 {
15466   float32x4x2_t ret;
15467   __builtin_aarch64_simd_oi __o;
15468   __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
15469   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
15470   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
15471   return ret;
15472 }
15473 
15474 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
vld2q_f64(const float64_t * __a)15475 vld2q_f64 (const float64_t * __a)
15476 {
15477   float64x2x2_t ret;
15478   __builtin_aarch64_simd_oi __o;
15479   __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
15480   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
15481   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
15482   return ret;
15483 }
15484 
15485 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_s64(const int64_t * __a)15486 vld3_s64 (const int64_t * __a)
15487 {
15488   int64x1x3_t ret;
15489   __builtin_aarch64_simd_ci __o;
15490   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15491   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15492   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15493   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15494   return ret;
15495 }
15496 
15497 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
vld3_u64(const uint64_t * __a)15498 vld3_u64 (const uint64_t * __a)
15499 {
15500   uint64x1x3_t ret;
15501   __builtin_aarch64_simd_ci __o;
15502   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15503   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15504   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15505   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15506   return ret;
15507 }
15508 
15509 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
vld3_f64(const float64_t * __a)15510 vld3_f64 (const float64_t * __a)
15511 {
15512   float64x1x3_t ret;
15513   __builtin_aarch64_simd_ci __o;
15514   __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
15515   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
15516   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
15517   ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
15518   return ret;
15519 }
15520 
15521 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
vld3_s8(const int8_t * __a)15522 vld3_s8 (const int8_t * __a)
15523 {
15524   int8x8x3_t ret;
15525   __builtin_aarch64_simd_ci __o;
15526   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15527   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15528   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15529   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15530   return ret;
15531 }
15532 
15533 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
vld3_p8(const poly8_t * __a)15534 vld3_p8 (const poly8_t * __a)
15535 {
15536   poly8x8x3_t ret;
15537   __builtin_aarch64_simd_ci __o;
15538   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15539   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15540   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15541   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15542   return ret;
15543 }
15544 
15545 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
vld3_s16(const int16_t * __a)15546 vld3_s16 (const int16_t * __a)
15547 {
15548   int16x4x3_t ret;
15549   __builtin_aarch64_simd_ci __o;
15550   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15551   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15552   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15553   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15554   return ret;
15555 }
15556 
15557 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
vld3_p16(const poly16_t * __a)15558 vld3_p16 (const poly16_t * __a)
15559 {
15560   poly16x4x3_t ret;
15561   __builtin_aarch64_simd_ci __o;
15562   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15563   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15564   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15565   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15566   return ret;
15567 }
15568 
15569 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
vld3_s32(const int32_t * __a)15570 vld3_s32 (const int32_t * __a)
15571 {
15572   int32x2x3_t ret;
15573   __builtin_aarch64_simd_ci __o;
15574   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15575   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15576   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15577   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15578   return ret;
15579 }
15580 
15581 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
vld3_u8(const uint8_t * __a)15582 vld3_u8 (const uint8_t * __a)
15583 {
15584   uint8x8x3_t ret;
15585   __builtin_aarch64_simd_ci __o;
15586   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15587   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15588   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15589   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15590   return ret;
15591 }
15592 
15593 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
vld3_u16(const uint16_t * __a)15594 vld3_u16 (const uint16_t * __a)
15595 {
15596   uint16x4x3_t ret;
15597   __builtin_aarch64_simd_ci __o;
15598   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15599   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15600   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15601   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15602   return ret;
15603 }
15604 
15605 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
vld3_u32(const uint32_t * __a)15606 vld3_u32 (const uint32_t * __a)
15607 {
15608   uint32x2x3_t ret;
15609   __builtin_aarch64_simd_ci __o;
15610   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15611   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15612   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15613   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15614   return ret;
15615 }
15616 
15617 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
vld3_f16(const float16_t * __a)15618 vld3_f16 (const float16_t * __a)
15619 {
15620   float16x4x3_t ret;
15621   __builtin_aarch64_simd_ci __o;
15622   __o = __builtin_aarch64_ld3v4hf (__a);
15623   ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
15624   ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
15625   ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
15626   return ret;
15627 }
15628 
15629 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32(const float32_t * __a)15630 vld3_f32 (const float32_t * __a)
15631 {
15632   float32x2x3_t ret;
15633   __builtin_aarch64_simd_ci __o;
15634   __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
15635   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
15636   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
15637   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
15638   return ret;
15639 }
15640 
15641 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
vld3q_s8(const int8_t * __a)15642 vld3q_s8 (const int8_t * __a)
15643 {
15644   int8x16x3_t ret;
15645   __builtin_aarch64_simd_ci __o;
15646   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15647   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15648   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15649   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15650   return ret;
15651 }
15652 
15653 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
vld3q_p8(const poly8_t * __a)15654 vld3q_p8 (const poly8_t * __a)
15655 {
15656   poly8x16x3_t ret;
15657   __builtin_aarch64_simd_ci __o;
15658   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15659   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15660   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15661   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15662   return ret;
15663 }
15664 
15665 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
vld3q_s16(const int16_t * __a)15666 vld3q_s16 (const int16_t * __a)
15667 {
15668   int16x8x3_t ret;
15669   __builtin_aarch64_simd_ci __o;
15670   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15671   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15672   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15673   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15674   return ret;
15675 }
15676 
15677 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
vld3q_p16(const poly16_t * __a)15678 vld3q_p16 (const poly16_t * __a)
15679 {
15680   poly16x8x3_t ret;
15681   __builtin_aarch64_simd_ci __o;
15682   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15683   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15684   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15685   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15686   return ret;
15687 }
15688 
15689 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
vld3q_s32(const int32_t * __a)15690 vld3q_s32 (const int32_t * __a)
15691 {
15692   int32x4x3_t ret;
15693   __builtin_aarch64_simd_ci __o;
15694   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15695   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15696   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15697   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15698   return ret;
15699 }
15700 
15701 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
vld3q_s64(const int64_t * __a)15702 vld3q_s64 (const int64_t * __a)
15703 {
15704   int64x2x3_t ret;
15705   __builtin_aarch64_simd_ci __o;
15706   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15707   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15708   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15709   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15710   return ret;
15711 }
15712 
15713 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
vld3q_u8(const uint8_t * __a)15714 vld3q_u8 (const uint8_t * __a)
15715 {
15716   uint8x16x3_t ret;
15717   __builtin_aarch64_simd_ci __o;
15718   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15719   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15720   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15721   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15722   return ret;
15723 }
15724 
15725 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
vld3q_u16(const uint16_t * __a)15726 vld3q_u16 (const uint16_t * __a)
15727 {
15728   uint16x8x3_t ret;
15729   __builtin_aarch64_simd_ci __o;
15730   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15731   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15732   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15733   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15734   return ret;
15735 }
15736 
15737 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
vld3q_u32(const uint32_t * __a)15738 vld3q_u32 (const uint32_t * __a)
15739 {
15740   uint32x4x3_t ret;
15741   __builtin_aarch64_simd_ci __o;
15742   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15743   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15744   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15745   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15746   return ret;
15747 }
15748 
15749 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
vld3q_u64(const uint64_t * __a)15750 vld3q_u64 (const uint64_t * __a)
15751 {
15752   uint64x2x3_t ret;
15753   __builtin_aarch64_simd_ci __o;
15754   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15755   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15756   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15757   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15758   return ret;
15759 }
15760 
15761 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
vld3q_f16(const float16_t * __a)15762 vld3q_f16 (const float16_t * __a)
15763 {
15764   float16x8x3_t ret;
15765   __builtin_aarch64_simd_ci __o;
15766   __o = __builtin_aarch64_ld3v8hf (__a);
15767   ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
15768   ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
15769   ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
15770   return ret;
15771 }
15772 
15773 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32(const float32_t * __a)15774 vld3q_f32 (const float32_t * __a)
15775 {
15776   float32x4x3_t ret;
15777   __builtin_aarch64_simd_ci __o;
15778   __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
15779   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
15780   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
15781   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
15782   return ret;
15783 }
15784 
15785 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
vld3q_f64(const float64_t * __a)15786 vld3q_f64 (const float64_t * __a)
15787 {
15788   float64x2x3_t ret;
15789   __builtin_aarch64_simd_ci __o;
15790   __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
15791   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
15792   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
15793   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
15794   return ret;
15795 }
15796 
15797 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_s64(const int64_t * __a)15798 vld4_s64 (const int64_t * __a)
15799 {
15800   int64x1x4_t ret;
15801   __builtin_aarch64_simd_xi __o;
15802   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15803   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15804   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15805   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15806   ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15807   return ret;
15808 }
15809 
15810 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
vld4_u64(const uint64_t * __a)15811 vld4_u64 (const uint64_t * __a)
15812 {
15813   uint64x1x4_t ret;
15814   __builtin_aarch64_simd_xi __o;
15815   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15816   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15817   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15818   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15819   ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15820   return ret;
15821 }
15822 
15823 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
vld4_f64(const float64_t * __a)15824 vld4_f64 (const float64_t * __a)
15825 {
15826   float64x1x4_t ret;
15827   __builtin_aarch64_simd_xi __o;
15828   __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
15829   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
15830   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
15831   ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
15832   ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
15833   return ret;
15834 }
15835 
15836 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
vld4_s8(const int8_t * __a)15837 vld4_s8 (const int8_t * __a)
15838 {
15839   int8x8x4_t ret;
15840   __builtin_aarch64_simd_xi __o;
15841   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15842   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15843   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15844   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15845   ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15846   return ret;
15847 }
15848 
15849 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
vld4_p8(const poly8_t * __a)15850 vld4_p8 (const poly8_t * __a)
15851 {
15852   poly8x8x4_t ret;
15853   __builtin_aarch64_simd_xi __o;
15854   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15855   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15856   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15857   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15858   ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15859   return ret;
15860 }
15861 
15862 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
vld4_s16(const int16_t * __a)15863 vld4_s16 (const int16_t * __a)
15864 {
15865   int16x4x4_t ret;
15866   __builtin_aarch64_simd_xi __o;
15867   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15868   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15869   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15870   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15871   ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15872   return ret;
15873 }
15874 
15875 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
vld4_p16(const poly16_t * __a)15876 vld4_p16 (const poly16_t * __a)
15877 {
15878   poly16x4x4_t ret;
15879   __builtin_aarch64_simd_xi __o;
15880   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15881   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15882   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15883   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15884   ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15885   return ret;
15886 }
15887 
15888 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
vld4_s32(const int32_t * __a)15889 vld4_s32 (const int32_t * __a)
15890 {
15891   int32x2x4_t ret;
15892   __builtin_aarch64_simd_xi __o;
15893   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15894   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15895   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15896   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15897   ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15898   return ret;
15899 }
15900 
15901 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
vld4_u8(const uint8_t * __a)15902 vld4_u8 (const uint8_t * __a)
15903 {
15904   uint8x8x4_t ret;
15905   __builtin_aarch64_simd_xi __o;
15906   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15907   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15908   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15909   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15910   ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15911   return ret;
15912 }
15913 
15914 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
vld4_u16(const uint16_t * __a)15915 vld4_u16 (const uint16_t * __a)
15916 {
15917   uint16x4x4_t ret;
15918   __builtin_aarch64_simd_xi __o;
15919   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15920   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15921   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15922   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15923   ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15924   return ret;
15925 }
15926 
15927 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
vld4_u32(const uint32_t * __a)15928 vld4_u32 (const uint32_t * __a)
15929 {
15930   uint32x2x4_t ret;
15931   __builtin_aarch64_simd_xi __o;
15932   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15933   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15934   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15935   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15936   ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15937   return ret;
15938 }
15939 
15940 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
vld4_f16(const float16_t * __a)15941 vld4_f16 (const float16_t * __a)
15942 {
15943   float16x4x4_t ret;
15944   __builtin_aarch64_simd_xi __o;
15945   __o = __builtin_aarch64_ld4v4hf (__a);
15946   ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
15947   ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
15948   ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
15949   ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
15950   return ret;
15951 }
15952 
15953 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32(const float32_t * __a)15954 vld4_f32 (const float32_t * __a)
15955 {
15956   float32x2x4_t ret;
15957   __builtin_aarch64_simd_xi __o;
15958   __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
15959   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
15960   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
15961   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
15962   ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
15963   return ret;
15964 }
15965 
15966 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
vld4q_s8(const int8_t * __a)15967 vld4q_s8 (const int8_t * __a)
15968 {
15969   int8x16x4_t ret;
15970   __builtin_aarch64_simd_xi __o;
15971   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15972   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15973   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15974   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15975   ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15976   return ret;
15977 }
15978 
15979 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
vld4q_p8(const poly8_t * __a)15980 vld4q_p8 (const poly8_t * __a)
15981 {
15982   poly8x16x4_t ret;
15983   __builtin_aarch64_simd_xi __o;
15984   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15985   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15986   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15987   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15988   ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15989   return ret;
15990 }
15991 
15992 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
vld4q_s16(const int16_t * __a)15993 vld4q_s16 (const int16_t * __a)
15994 {
15995   int16x8x4_t ret;
15996   __builtin_aarch64_simd_xi __o;
15997   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
15998   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
15999   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16000   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16001   ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16002   return ret;
16003 }
16004 
16005 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
vld4q_p16(const poly16_t * __a)16006 vld4q_p16 (const poly16_t * __a)
16007 {
16008   poly16x8x4_t ret;
16009   __builtin_aarch64_simd_xi __o;
16010   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16011   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16012   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16013   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16014   ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16015   return ret;
16016 }
16017 
16018 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
vld4q_s32(const int32_t * __a)16019 vld4q_s32 (const int32_t * __a)
16020 {
16021   int32x4x4_t ret;
16022   __builtin_aarch64_simd_xi __o;
16023   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16024   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16025   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16026   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16027   ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16028   return ret;
16029 }
16030 
16031 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
vld4q_s64(const int64_t * __a)16032 vld4q_s64 (const int64_t * __a)
16033 {
16034   int64x2x4_t ret;
16035   __builtin_aarch64_simd_xi __o;
16036   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16037   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16038   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16039   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16040   ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16041   return ret;
16042 }
16043 
16044 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
vld4q_u8(const uint8_t * __a)16045 vld4q_u8 (const uint8_t * __a)
16046 {
16047   uint8x16x4_t ret;
16048   __builtin_aarch64_simd_xi __o;
16049   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16050   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16051   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16052   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16053   ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16054   return ret;
16055 }
16056 
16057 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
vld4q_u16(const uint16_t * __a)16058 vld4q_u16 (const uint16_t * __a)
16059 {
16060   uint16x8x4_t ret;
16061   __builtin_aarch64_simd_xi __o;
16062   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16063   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16064   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16065   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16066   ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16067   return ret;
16068 }
16069 
16070 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
vld4q_u32(const uint32_t * __a)16071 vld4q_u32 (const uint32_t * __a)
16072 {
16073   uint32x4x4_t ret;
16074   __builtin_aarch64_simd_xi __o;
16075   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16076   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16077   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16078   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16079   ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16080   return ret;
16081 }
16082 
16083 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
vld4q_u64(const uint64_t * __a)16084 vld4q_u64 (const uint64_t * __a)
16085 {
16086   uint64x2x4_t ret;
16087   __builtin_aarch64_simd_xi __o;
16088   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16089   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16090   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16091   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16092   ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16093   return ret;
16094 }
16095 
16096 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
vld4q_f16(const float16_t * __a)16097 vld4q_f16 (const float16_t * __a)
16098 {
16099   float16x8x4_t ret;
16100   __builtin_aarch64_simd_xi __o;
16101   __o = __builtin_aarch64_ld4v8hf (__a);
16102   ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
16103   ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
16104   ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
16105   ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
16106   return ret;
16107 }
16108 
16109 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32(const float32_t * __a)16110 vld4q_f32 (const float32_t * __a)
16111 {
16112   float32x4x4_t ret;
16113   __builtin_aarch64_simd_xi __o;
16114   __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
16115   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
16116   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
16117   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
16118   ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
16119   return ret;
16120 }
16121 
16122 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
vld4q_f64(const float64_t * __a)16123 vld4q_f64 (const float64_t * __a)
16124 {
16125   float64x2x4_t ret;
16126   __builtin_aarch64_simd_xi __o;
16127   __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
16128   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
16129   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
16130   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
16131   ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
16132   return ret;
16133 }
16134 
16135 /* vldn_dup */
16136 
16137 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vld2_dup_s8(const int8_t * __a)16138 vld2_dup_s8 (const int8_t * __a)
16139 {
16140   int8x8x2_t ret;
16141   __builtin_aarch64_simd_oi __o;
16142   __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16143   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16144   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16145   return ret;
16146 }
16147 
16148 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vld2_dup_s16(const int16_t * __a)16149 vld2_dup_s16 (const int16_t * __a)
16150 {
16151   int16x4x2_t ret;
16152   __builtin_aarch64_simd_oi __o;
16153   __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16154   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16155   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16156   return ret;
16157 }
16158 
16159 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_s32(const int32_t * __a)16160 vld2_dup_s32 (const int32_t * __a)
16161 {
16162   int32x2x2_t ret;
16163   __builtin_aarch64_simd_oi __o;
16164   __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16165   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16166   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16167   return ret;
16168 }
16169 
16170 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vld2_dup_f16(const float16_t * __a)16171 vld2_dup_f16 (const float16_t * __a)
16172 {
16173   float16x4x2_t ret;
16174   __builtin_aarch64_simd_oi __o;
16175   __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16176   ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
16177   ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
16178   return ret;
16179 }
16180 
16181 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32(const float32_t * __a)16182 vld2_dup_f32 (const float32_t * __a)
16183 {
16184   float32x2x2_t ret;
16185   __builtin_aarch64_simd_oi __o;
16186   __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16187   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16188   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16189   return ret;
16190 }
16191 
16192 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_f64(const float64_t * __a)16193 vld2_dup_f64 (const float64_t * __a)
16194 {
16195   float64x1x2_t ret;
16196   __builtin_aarch64_simd_oi __o;
16197   __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
16198   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16199   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16200   return ret;
16201 }
16202 
16203 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vld2_dup_u8(const uint8_t * __a)16204 vld2_dup_u8 (const uint8_t * __a)
16205 {
16206   uint8x8x2_t ret;
16207   __builtin_aarch64_simd_oi __o;
16208   __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16209   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16210   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16211   return ret;
16212 }
16213 
16214 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vld2_dup_u16(const uint16_t * __a)16215 vld2_dup_u16 (const uint16_t * __a)
16216 {
16217   uint16x4x2_t ret;
16218   __builtin_aarch64_simd_oi __o;
16219   __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16220   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16221   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16222   return ret;
16223 }
16224 
16225 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_u32(const uint32_t * __a)16226 vld2_dup_u32 (const uint32_t * __a)
16227 {
16228   uint32x2x2_t ret;
16229   __builtin_aarch64_simd_oi __o;
16230   __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16231   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16232   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16233   return ret;
16234 }
16235 
16236 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vld2_dup_p8(const poly8_t * __a)16237 vld2_dup_p8 (const poly8_t * __a)
16238 {
16239   poly8x8x2_t ret;
16240   __builtin_aarch64_simd_oi __o;
16241   __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16242   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16243   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16244   return ret;
16245 }
16246 
16247 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vld2_dup_p16(const poly16_t * __a)16248 vld2_dup_p16 (const poly16_t * __a)
16249 {
16250   poly16x4x2_t ret;
16251   __builtin_aarch64_simd_oi __o;
16252   __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16253   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16254   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16255   return ret;
16256 }
16257 
16258 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_s64(const int64_t * __a)16259 vld2_dup_s64 (const int64_t * __a)
16260 {
16261   int64x1x2_t ret;
16262   __builtin_aarch64_simd_oi __o;
16263   __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16264   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16265   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16266   return ret;
16267 }
16268 
16269 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_u64(const uint64_t * __a)16270 vld2_dup_u64 (const uint64_t * __a)
16271 {
16272   uint64x1x2_t ret;
16273   __builtin_aarch64_simd_oi __o;
16274   __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16275   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16276   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16277   return ret;
16278 }
16279 
16280 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vld2q_dup_s8(const int8_t * __a)16281 vld2q_dup_s8 (const int8_t * __a)
16282 {
16283   int8x16x2_t ret;
16284   __builtin_aarch64_simd_oi __o;
16285   __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16286   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16287   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16288   return ret;
16289 }
16290 
16291 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vld2q_dup_p8(const poly8_t * __a)16292 vld2q_dup_p8 (const poly8_t * __a)
16293 {
16294   poly8x16x2_t ret;
16295   __builtin_aarch64_simd_oi __o;
16296   __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16297   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16298   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16299   return ret;
16300 }
16301 
16302 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vld2q_dup_s16(const int16_t * __a)16303 vld2q_dup_s16 (const int16_t * __a)
16304 {
16305   int16x8x2_t ret;
16306   __builtin_aarch64_simd_oi __o;
16307   __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16308   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16309   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16310   return ret;
16311 }
16312 
16313 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vld2q_dup_p16(const poly16_t * __a)16314 vld2q_dup_p16 (const poly16_t * __a)
16315 {
16316   poly16x8x2_t ret;
16317   __builtin_aarch64_simd_oi __o;
16318   __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16319   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16320   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16321   return ret;
16322 }
16323 
16324 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_s32(const int32_t * __a)16325 vld2q_dup_s32 (const int32_t * __a)
16326 {
16327   int32x4x2_t ret;
16328   __builtin_aarch64_simd_oi __o;
16329   __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16330   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16331   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16332   return ret;
16333 }
16334 
16335 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
vld2q_dup_s64(const int64_t * __a)16336 vld2q_dup_s64 (const int64_t * __a)
16337 {
16338   int64x2x2_t ret;
16339   __builtin_aarch64_simd_oi __o;
16340   __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16341   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16342   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16343   return ret;
16344 }
16345 
16346 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vld2q_dup_u8(const uint8_t * __a)16347 vld2q_dup_u8 (const uint8_t * __a)
16348 {
16349   uint8x16x2_t ret;
16350   __builtin_aarch64_simd_oi __o;
16351   __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16352   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16353   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16354   return ret;
16355 }
16356 
16357 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vld2q_dup_u16(const uint16_t * __a)16358 vld2q_dup_u16 (const uint16_t * __a)
16359 {
16360   uint16x8x2_t ret;
16361   __builtin_aarch64_simd_oi __o;
16362   __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16363   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16364   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16365   return ret;
16366 }
16367 
16368 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_u32(const uint32_t * __a)16369 vld2q_dup_u32 (const uint32_t * __a)
16370 {
16371   uint32x4x2_t ret;
16372   __builtin_aarch64_simd_oi __o;
16373   __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16374   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16375   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16376   return ret;
16377 }
16378 
16379 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
vld2q_dup_u64(const uint64_t * __a)16380 vld2q_dup_u64 (const uint64_t * __a)
16381 {
16382   uint64x2x2_t ret;
16383   __builtin_aarch64_simd_oi __o;
16384   __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16385   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16386   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16387   return ret;
16388 }
16389 
16390 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vld2q_dup_f16(const float16_t * __a)16391 vld2q_dup_f16 (const float16_t * __a)
16392 {
16393   float16x8x2_t ret;
16394   __builtin_aarch64_simd_oi __o;
16395   __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16396   ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
16397   ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
16398   return ret;
16399 }
16400 
16401 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_f32(const float32_t * __a)16402 vld2q_dup_f32 (const float32_t * __a)
16403 {
16404   float32x4x2_t ret;
16405   __builtin_aarch64_simd_oi __o;
16406   __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16407   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16408   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16409   return ret;
16410 }
16411 
16412 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
vld2q_dup_f64(const float64_t * __a)16413 vld2q_dup_f64 (const float64_t * __a)
16414 {
16415   float64x2x2_t ret;
16416   __builtin_aarch64_simd_oi __o;
16417   __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
16418   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16419   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16420   return ret;
16421 }
16422 
16423 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_s64(const int64_t * __a)16424 vld3_dup_s64 (const int64_t * __a)
16425 {
16426   int64x1x3_t ret;
16427   __builtin_aarch64_simd_ci __o;
16428   __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16429   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16430   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16431   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16432   return ret;
16433 }
16434 
16435 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_u64(const uint64_t * __a)16436 vld3_dup_u64 (const uint64_t * __a)
16437 {
16438   uint64x1x3_t ret;
16439   __builtin_aarch64_simd_ci __o;
16440   __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16441   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16442   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16443   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16444   return ret;
16445 }
16446 
16447 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_f64(const float64_t * __a)16448 vld3_dup_f64 (const float64_t * __a)
16449 {
16450   float64x1x3_t ret;
16451   __builtin_aarch64_simd_ci __o;
16452   __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
16453   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16454   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16455   ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16456   return ret;
16457 }
16458 
16459 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
vld3_dup_s8(const int8_t * __a)16460 vld3_dup_s8 (const int8_t * __a)
16461 {
16462   int8x8x3_t ret;
16463   __builtin_aarch64_simd_ci __o;
16464   __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16465   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16466   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16467   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16468   return ret;
16469 }
16470 
16471 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
vld3_dup_p8(const poly8_t * __a)16472 vld3_dup_p8 (const poly8_t * __a)
16473 {
16474   poly8x8x3_t ret;
16475   __builtin_aarch64_simd_ci __o;
16476   __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16477   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16478   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16479   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16480   return ret;
16481 }
16482 
16483 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
vld3_dup_s16(const int16_t * __a)16484 vld3_dup_s16 (const int16_t * __a)
16485 {
16486   int16x4x3_t ret;
16487   __builtin_aarch64_simd_ci __o;
16488   __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16489   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16490   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16491   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16492   return ret;
16493 }
16494 
16495 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
vld3_dup_p16(const poly16_t * __a)16496 vld3_dup_p16 (const poly16_t * __a)
16497 {
16498   poly16x4x3_t ret;
16499   __builtin_aarch64_simd_ci __o;
16500   __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16501   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16502   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16503   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16504   return ret;
16505 }
16506 
16507 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_s32(const int32_t * __a)16508 vld3_dup_s32 (const int32_t * __a)
16509 {
16510   int32x2x3_t ret;
16511   __builtin_aarch64_simd_ci __o;
16512   __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16513   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16514   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16515   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16516   return ret;
16517 }
16518 
16519 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
vld3_dup_u8(const uint8_t * __a)16520 vld3_dup_u8 (const uint8_t * __a)
16521 {
16522   uint8x8x3_t ret;
16523   __builtin_aarch64_simd_ci __o;
16524   __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16525   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16526   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16527   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16528   return ret;
16529 }
16530 
16531 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
vld3_dup_u16(const uint16_t * __a)16532 vld3_dup_u16 (const uint16_t * __a)
16533 {
16534   uint16x4x3_t ret;
16535   __builtin_aarch64_simd_ci __o;
16536   __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16537   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16538   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16539   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16540   return ret;
16541 }
16542 
16543 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_u32(const uint32_t * __a)16544 vld3_dup_u32 (const uint32_t * __a)
16545 {
16546   uint32x2x3_t ret;
16547   __builtin_aarch64_simd_ci __o;
16548   __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16549   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16550   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16551   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16552   return ret;
16553 }
16554 
16555 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
vld3_dup_f16(const float16_t * __a)16556 vld3_dup_f16 (const float16_t * __a)
16557 {
16558   float16x4x3_t ret;
16559   __builtin_aarch64_simd_ci __o;
16560   __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16561   ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
16562   ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
16563   ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
16564   return ret;
16565 }
16566 
16567 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32(const float32_t * __a)16568 vld3_dup_f32 (const float32_t * __a)
16569 {
16570   float32x2x3_t ret;
16571   __builtin_aarch64_simd_ci __o;
16572   __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16573   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16574   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16575   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16576   return ret;
16577 }
16578 
16579 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
vld3q_dup_s8(const int8_t * __a)16580 vld3q_dup_s8 (const int8_t * __a)
16581 {
16582   int8x16x3_t ret;
16583   __builtin_aarch64_simd_ci __o;
16584   __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16585   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16586   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16587   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16588   return ret;
16589 }
16590 
16591 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
vld3q_dup_p8(const poly8_t * __a)16592 vld3q_dup_p8 (const poly8_t * __a)
16593 {
16594   poly8x16x3_t ret;
16595   __builtin_aarch64_simd_ci __o;
16596   __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16597   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16598   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16599   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16600   return ret;
16601 }
16602 
16603 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
vld3q_dup_s16(const int16_t * __a)16604 vld3q_dup_s16 (const int16_t * __a)
16605 {
16606   int16x8x3_t ret;
16607   __builtin_aarch64_simd_ci __o;
16608   __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16609   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16610   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16611   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16612   return ret;
16613 }
16614 
16615 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
vld3q_dup_p16(const poly16_t * __a)16616 vld3q_dup_p16 (const poly16_t * __a)
16617 {
16618   poly16x8x3_t ret;
16619   __builtin_aarch64_simd_ci __o;
16620   __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16621   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16622   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16623   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16624   return ret;
16625 }
16626 
16627 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_s32(const int32_t * __a)16628 vld3q_dup_s32 (const int32_t * __a)
16629 {
16630   int32x4x3_t ret;
16631   __builtin_aarch64_simd_ci __o;
16632   __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16633   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16634   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16635   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16636   return ret;
16637 }
16638 
16639 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
vld3q_dup_s64(const int64_t * __a)16640 vld3q_dup_s64 (const int64_t * __a)
16641 {
16642   int64x2x3_t ret;
16643   __builtin_aarch64_simd_ci __o;
16644   __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16645   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16646   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16647   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16648   return ret;
16649 }
16650 
16651 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
vld3q_dup_u8(const uint8_t * __a)16652 vld3q_dup_u8 (const uint8_t * __a)
16653 {
16654   uint8x16x3_t ret;
16655   __builtin_aarch64_simd_ci __o;
16656   __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16657   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16658   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16659   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16660   return ret;
16661 }
16662 
16663 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
vld3q_dup_u16(const uint16_t * __a)16664 vld3q_dup_u16 (const uint16_t * __a)
16665 {
16666   uint16x8x3_t ret;
16667   __builtin_aarch64_simd_ci __o;
16668   __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16669   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16670   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16671   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16672   return ret;
16673 }
16674 
16675 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_u32(const uint32_t * __a)16676 vld3q_dup_u32 (const uint32_t * __a)
16677 {
16678   uint32x4x3_t ret;
16679   __builtin_aarch64_simd_ci __o;
16680   __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16681   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16682   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16683   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16684   return ret;
16685 }
16686 
16687 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
vld3q_dup_u64(const uint64_t * __a)16688 vld3q_dup_u64 (const uint64_t * __a)
16689 {
16690   uint64x2x3_t ret;
16691   __builtin_aarch64_simd_ci __o;
16692   __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16693   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16694   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16695   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16696   return ret;
16697 }
16698 
16699 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
vld3q_dup_f16(const float16_t * __a)16700 vld3q_dup_f16 (const float16_t * __a)
16701 {
16702   float16x8x3_t ret;
16703   __builtin_aarch64_simd_ci __o;
16704   __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16705   ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
16706   ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
16707   ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
16708   return ret;
16709 }
16710 
16711 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_f32(const float32_t * __a)16712 vld3q_dup_f32 (const float32_t * __a)
16713 {
16714   float32x4x3_t ret;
16715   __builtin_aarch64_simd_ci __o;
16716   __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16717   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
16718   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
16719   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
16720   return ret;
16721 }
16722 
16723 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
vld3q_dup_f64(const float64_t * __a)16724 vld3q_dup_f64 (const float64_t * __a)
16725 {
16726   float64x2x3_t ret;
16727   __builtin_aarch64_simd_ci __o;
16728   __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
16729   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
16730   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
16731   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
16732   return ret;
16733 }
16734 
16735 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_s64(const int64_t * __a)16736 vld4_dup_s64 (const int64_t * __a)
16737 {
16738   int64x1x4_t ret;
16739   __builtin_aarch64_simd_xi __o;
16740   __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16741   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16742   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16743   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16744   ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16745   return ret;
16746 }
16747 
16748 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_u64(const uint64_t * __a)16749 vld4_dup_u64 (const uint64_t * __a)
16750 {
16751   uint64x1x4_t ret;
16752   __builtin_aarch64_simd_xi __o;
16753   __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16754   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16755   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16756   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16757   ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16758   return ret;
16759 }
16760 
16761 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_f64(const float64_t * __a)16762 vld4_dup_f64 (const float64_t * __a)
16763 {
16764   float64x1x4_t ret;
16765   __builtin_aarch64_simd_xi __o;
16766   __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
16767   ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
16768   ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
16769   ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
16770   ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
16771   return ret;
16772 }
16773 
16774 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
vld4_dup_s8(const int8_t * __a)16775 vld4_dup_s8 (const int8_t * __a)
16776 {
16777   int8x8x4_t ret;
16778   __builtin_aarch64_simd_xi __o;
16779   __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16780   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16781   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16782   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16783   ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16784   return ret;
16785 }
16786 
16787 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
vld4_dup_p8(const poly8_t * __a)16788 vld4_dup_p8 (const poly8_t * __a)
16789 {
16790   poly8x8x4_t ret;
16791   __builtin_aarch64_simd_xi __o;
16792   __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16793   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16794   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16795   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16796   ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16797   return ret;
16798 }
16799 
16800 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
vld4_dup_s16(const int16_t * __a)16801 vld4_dup_s16 (const int16_t * __a)
16802 {
16803   int16x4x4_t ret;
16804   __builtin_aarch64_simd_xi __o;
16805   __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16806   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16807   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16808   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16809   ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16810   return ret;
16811 }
16812 
16813 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
vld4_dup_p16(const poly16_t * __a)16814 vld4_dup_p16 (const poly16_t * __a)
16815 {
16816   poly16x4x4_t ret;
16817   __builtin_aarch64_simd_xi __o;
16818   __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16819   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16820   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16821   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16822   ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16823   return ret;
16824 }
16825 
16826 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_s32(const int32_t * __a)16827 vld4_dup_s32 (const int32_t * __a)
16828 {
16829   int32x2x4_t ret;
16830   __builtin_aarch64_simd_xi __o;
16831   __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16832   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16833   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16834   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16835   ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16836   return ret;
16837 }
16838 
16839 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
vld4_dup_u8(const uint8_t * __a)16840 vld4_dup_u8 (const uint8_t * __a)
16841 {
16842   uint8x8x4_t ret;
16843   __builtin_aarch64_simd_xi __o;
16844   __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16845   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16846   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16847   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16848   ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16849   return ret;
16850 }
16851 
16852 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
vld4_dup_u16(const uint16_t * __a)16853 vld4_dup_u16 (const uint16_t * __a)
16854 {
16855   uint16x4x4_t ret;
16856   __builtin_aarch64_simd_xi __o;
16857   __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16858   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16859   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16860   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16861   ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16862   return ret;
16863 }
16864 
16865 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_u32(const uint32_t * __a)16866 vld4_dup_u32 (const uint32_t * __a)
16867 {
16868   uint32x2x4_t ret;
16869   __builtin_aarch64_simd_xi __o;
16870   __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16871   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16872   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16873   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16874   ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16875   return ret;
16876 }
16877 
16878 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
vld4_dup_f16(const float16_t * __a)16879 vld4_dup_f16 (const float16_t * __a)
16880 {
16881   float16x4x4_t ret;
16882   __builtin_aarch64_simd_xi __o;
16883   __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16884   ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
16885   ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
16886   ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
16887   ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
16888   return ret;
16889 }
16890 
16891 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32(const float32_t * __a)16892 vld4_dup_f32 (const float32_t * __a)
16893 {
16894   float32x2x4_t ret;
16895   __builtin_aarch64_simd_xi __o;
16896   __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16897   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
16898   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
16899   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
16900   ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
16901   return ret;
16902 }
16903 
16904 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
vld4q_dup_s8(const int8_t * __a)16905 vld4q_dup_s8 (const int8_t * __a)
16906 {
16907   int8x16x4_t ret;
16908   __builtin_aarch64_simd_xi __o;
16909   __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16910   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16911   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16912   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16913   ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16914   return ret;
16915 }
16916 
16917 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
vld4q_dup_p8(const poly8_t * __a)16918 vld4q_dup_p8 (const poly8_t * __a)
16919 {
16920   poly8x16x4_t ret;
16921   __builtin_aarch64_simd_xi __o;
16922   __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16923   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16924   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16925   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16926   ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16927   return ret;
16928 }
16929 
16930 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
vld4q_dup_s16(const int16_t * __a)16931 vld4q_dup_s16 (const int16_t * __a)
16932 {
16933   int16x8x4_t ret;
16934   __builtin_aarch64_simd_xi __o;
16935   __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16936   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16937   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16938   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16939   ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16940   return ret;
16941 }
16942 
16943 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
vld4q_dup_p16(const poly16_t * __a)16944 vld4q_dup_p16 (const poly16_t * __a)
16945 {
16946   poly16x8x4_t ret;
16947   __builtin_aarch64_simd_xi __o;
16948   __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16949   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16950   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16951   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16952   ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16953   return ret;
16954 }
16955 
16956 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_s32(const int32_t * __a)16957 vld4q_dup_s32 (const int32_t * __a)
16958 {
16959   int32x4x4_t ret;
16960   __builtin_aarch64_simd_xi __o;
16961   __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
16962   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16963   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16964   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16965   ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16966   return ret;
16967 }
16968 
16969 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
vld4q_dup_s64(const int64_t * __a)16970 vld4q_dup_s64 (const int64_t * __a)
16971 {
16972   int64x2x4_t ret;
16973   __builtin_aarch64_simd_xi __o;
16974   __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
16975   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16976   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16977   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16978   ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16979   return ret;
16980 }
16981 
16982 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
vld4q_dup_u8(const uint8_t * __a)16983 vld4q_dup_u8 (const uint8_t * __a)
16984 {
16985   uint8x16x4_t ret;
16986   __builtin_aarch64_simd_xi __o;
16987   __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16988   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16989   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16990   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16991   ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16992   return ret;
16993 }
16994 
16995 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
vld4q_dup_u16(const uint16_t * __a)16996 vld4q_dup_u16 (const uint16_t * __a)
16997 {
16998   uint16x8x4_t ret;
16999   __builtin_aarch64_simd_xi __o;
17000   __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17001   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17002   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17003   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17004   ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17005   return ret;
17006 }
17007 
17008 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_u32(const uint32_t * __a)17009 vld4q_dup_u32 (const uint32_t * __a)
17010 {
17011   uint32x4x4_t ret;
17012   __builtin_aarch64_simd_xi __o;
17013   __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17014   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17015   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17016   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17017   ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17018   return ret;
17019 }
17020 
17021 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
vld4q_dup_u64(const uint64_t * __a)17022 vld4q_dup_u64 (const uint64_t * __a)
17023 {
17024   uint64x2x4_t ret;
17025   __builtin_aarch64_simd_xi __o;
17026   __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17027   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17028   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17029   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17030   ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17031   return ret;
17032 }
17033 
17034 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
vld4q_dup_f16(const float16_t * __a)17035 vld4q_dup_f16 (const float16_t * __a)
17036 {
17037   float16x8x4_t ret;
17038   __builtin_aarch64_simd_xi __o;
17039   __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
17040   ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
17041   ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
17042   ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
17043   ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
17044   return ret;
17045 }
17046 
17047 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_f32(const float32_t * __a)17048 vld4q_dup_f32 (const float32_t * __a)
17049 {
17050   float32x4x4_t ret;
17051   __builtin_aarch64_simd_xi __o;
17052   __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17053   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17054   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17055   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17056   ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17057   return ret;
17058 }
17059 
17060 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
vld4q_dup_f64(const float64_t * __a)17061 vld4q_dup_f64 (const float64_t * __a)
17062 {
17063   float64x2x4_t ret;
17064   __builtin_aarch64_simd_xi __o;
17065   __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
17066   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17067   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17068   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17069   ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17070   return ret;
17071 }
17072 
17073 /* vld2_lane */
17074 
17075 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
17076 			 qmode, ptrmode, funcsuffix, signedtype)	   \
17077 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17078 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
17079 {									   \
17080   __builtin_aarch64_simd_oi __o;					   \
17081   largetype __temp;							   \
17082   __temp.val[0] =							   \
17083     vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
17084   __temp.val[1] =							   \
17085     vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
17086   __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
17087 					    (signedtype) __temp.val[0],	   \
17088 					    0);				   \
17089   __o = __builtin_aarch64_set_qregoi##qmode (__o,			   \
17090 					    (signedtype) __temp.val[1],	   \
17091 					    1);				   \
17092   __o =	__builtin_aarch64_ld2_lane##mode (				   \
17093 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
17094   __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);	   \
17095   __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1);	   \
17096   return __b;								   \
17097 }
17098 
__LD2_LANE_FUNC(float16x4x2_t,float16x4_t,float16x8x2_t,float16_t,v4hf,v8hf,hf,f16,float16x8_t)17099 __LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
17100 		 v8hf, hf, f16, float16x8_t)
17101 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
17102 		 sf, f32, float32x4_t)
17103 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
17104 		 df, f64, float64x2_t)
17105 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
17106 		 int8x16_t)
17107 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
17108 		 p16, int16x8_t)
17109 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
17110 		 int8x16_t)
17111 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
17112 		 int16x8_t)
17113 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
17114 		 int32x4_t)
17115 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
17116 		 int64x2_t)
17117 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
17118 		 int8x16_t)
17119 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
17120 		 u16, int16x8_t)
17121 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
17122 		 u32, int32x4_t)
17123 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
17124 		 u64, int64x2_t)
17125 
17126 #undef __LD2_LANE_FUNC
17127 
17128 /* vld2q_lane */
17129 
17130 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17131 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17132 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17133 {									   \
17134   __builtin_aarch64_simd_oi __o;					   \
17135   intype ret;								   \
17136   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
17137   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
17138   __o = __builtin_aarch64_ld2_lane##mode (				   \
17139 	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
17140   ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0);	   \
17141   ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1);	   \
17142   return ret;								   \
17143 }
17144 
17145 __LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
17146 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
17147 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
17148 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17149 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17150 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
17151 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
17152 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
17153 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
17154 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17155 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17156 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
17157 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
17158 
17159 #undef __LD2_LANE_FUNC
17160 
17161 /* vld3_lane */
17162 
17163 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
17164 			 qmode, ptrmode, funcsuffix, signedtype)	   \
17165 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17166 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
17167 {									   \
17168   __builtin_aarch64_simd_ci __o;					   \
17169   largetype __temp;							   \
17170   __temp.val[0] =							   \
17171     vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
17172   __temp.val[1] =							   \
17173     vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
17174   __temp.val[2] =							   \
17175     vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
17176   __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
17177 					    (signedtype) __temp.val[0],	   \
17178 					    0);				   \
17179   __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
17180 					    (signedtype) __temp.val[1],	   \
17181 					    1);				   \
17182   __o = __builtin_aarch64_set_qregci##qmode (__o,			   \
17183 					    (signedtype) __temp.val[2],	   \
17184 					    2);				   \
17185   __o =	__builtin_aarch64_ld3_lane##mode (				   \
17186 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
17187   __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);	   \
17188   __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1);	   \
17189   __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2);	   \
17190   return __b;								   \
17191 }
17192 
17193 __LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
17194 		 v8hf, hf, f16, float16x8_t)
17195 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
17196 		 sf, f32, float32x4_t)
17197 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
17198 		 df, f64, float64x2_t)
17199 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
17200 		 int8x16_t)
17201 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
17202 		 p16, int16x8_t)
17203 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
17204 		 int8x16_t)
17205 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
17206 		 int16x8_t)
17207 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
17208 		 int32x4_t)
17209 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
17210 		 int64x2_t)
17211 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
17212 		 int8x16_t)
17213 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
17214 		 u16, int16x8_t)
17215 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
17216 		 u32, int32x4_t)
17217 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
17218 		 u64, int64x2_t)
17219 
17220 #undef __LD3_LANE_FUNC
17221 
17222 /* vld3q_lane */
17223 
17224 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17225 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17226 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17227 {									   \
17228   __builtin_aarch64_simd_ci __o;					   \
17229   intype ret;								   \
17230   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
17231   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
17232   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
17233   __o = __builtin_aarch64_ld3_lane##mode (				   \
17234 	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
17235   ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0);	   \
17236   ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1);	   \
17237   ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2);	   \
17238   return ret;								   \
17239 }
17240 
17241 __LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
17242 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
17243 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
17244 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17245 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17246 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
17247 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
17248 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
17249 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
17250 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17251 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17252 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
17253 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
17254 
17255 #undef __LD3_LANE_FUNC
17256 
17257 /* vld4_lane */
17258 
17259 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode,	   \
17260 			 qmode, ptrmode, funcsuffix, signedtype)	   \
17261 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17262 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
17263 {									   \
17264   __builtin_aarch64_simd_xi __o;					   \
17265   largetype __temp;							   \
17266   __temp.val[0] =							   \
17267     vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
17268   __temp.val[1] =							   \
17269     vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
17270   __temp.val[2] =							   \
17271     vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
17272   __temp.val[3] =							   \
17273     vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0));	   \
17274   __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
17275 					    (signedtype) __temp.val[0],	   \
17276 					    0);				   \
17277   __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
17278 					    (signedtype) __temp.val[1],	   \
17279 					    1);				   \
17280   __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
17281 					    (signedtype) __temp.val[2],	   \
17282 					    2);				   \
17283   __o = __builtin_aarch64_set_qregxi##qmode (__o,			   \
17284 					    (signedtype) __temp.val[3],	   \
17285 					    3);				   \
17286   __o =	__builtin_aarch64_ld4_lane##mode (				   \
17287 	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
17288   __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0);	   \
17289   __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1);	   \
17290   __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2);	   \
17291   __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3);	   \
17292   return __b;								   \
17293 }
17294 
17295 /* vld4q_lane */
17296 
17297 __LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
17298 		 v8hf, hf, f16, float16x8_t)
17299 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
17300 		 sf, f32, float32x4_t)
17301 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
17302 		 df, f64, float64x2_t)
17303 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
17304 		 int8x16_t)
17305 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
17306 		 p16, int16x8_t)
17307 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
17308 		 int8x16_t)
17309 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
17310 		 int16x8_t)
17311 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
17312 		 int32x4_t)
17313 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
17314 		 int64x2_t)
17315 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
17316 		 int8x16_t)
17317 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
17318 		 u16, int16x8_t)
17319 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
17320 		 u32, int32x4_t)
17321 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
17322 		 u64, int64x2_t)
17323 
17324 #undef __LD4_LANE_FUNC
17325 
17326 /* vld4q_lane */
17327 
17328 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17329 __extension__ static __inline intype __attribute__ ((__always_inline__))   \
17330 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17331 {									   \
17332   __builtin_aarch64_simd_xi __o;					   \
17333   intype ret;								   \
17334   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
17335   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
17336   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
17337   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
17338   __o = __builtin_aarch64_ld4_lane##mode (				   \
17339 	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
17340   ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0);	   \
17341   ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1);	   \
17342   ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2);	   \
17343   ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3);	   \
17344   return ret;								   \
17345 }
17346 
17347 __LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
17348 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
17349 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
17350 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17351 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17352 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
17353 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
17354 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
17355 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
17356 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17357 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17358 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
17359 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
17360 
17361 #undef __LD4_LANE_FUNC
17362 
17363 /* vmax */
17364 
17365 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17366 vmax_f32 (float32x2_t __a, float32x2_t __b)
17367 {
17368   return __builtin_aarch64_smax_nanv2sf (__a, __b);
17369 }
17370 
17371 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmax_s8(int8x8_t __a,int8x8_t __b)17372 vmax_s8 (int8x8_t __a, int8x8_t __b)
17373 {
17374   return __builtin_aarch64_smaxv8qi (__a, __b);
17375 }
17376 
17377 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmax_s16(int16x4_t __a,int16x4_t __b)17378 vmax_s16 (int16x4_t __a, int16x4_t __b)
17379 {
17380   return __builtin_aarch64_smaxv4hi (__a, __b);
17381 }
17382 
17383 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmax_s32(int32x2_t __a,int32x2_t __b)17384 vmax_s32 (int32x2_t __a, int32x2_t __b)
17385 {
17386   return __builtin_aarch64_smaxv2si (__a, __b);
17387 }
17388 
17389 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmax_u8(uint8x8_t __a,uint8x8_t __b)17390 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17391 {
17392   return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17393 						 (int8x8_t) __b);
17394 }
17395 
17396 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmax_u16(uint16x4_t __a,uint16x4_t __b)17397 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17398 {
17399   return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17400 						  (int16x4_t) __b);
17401 }
17402 
17403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmax_u32(uint32x2_t __a,uint32x2_t __b)17404 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17405 {
17406   return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17407 						  (int32x2_t) __b);
17408 }
17409 
17410 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxq_f32(float32x4_t __a,float32x4_t __b)17411 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17412 {
17413   return __builtin_aarch64_smax_nanv4sf (__a, __b);
17414 }
17415 
17416 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxq_f64(float64x2_t __a,float64x2_t __b)17417 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17418 {
17419   return __builtin_aarch64_smax_nanv2df (__a, __b);
17420 }
17421 
17422 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmaxq_s8(int8x16_t __a,int8x16_t __b)17423 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17424 {
17425   return __builtin_aarch64_smaxv16qi (__a, __b);
17426 }
17427 
17428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmaxq_s16(int16x8_t __a,int16x8_t __b)17429 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17430 {
17431   return __builtin_aarch64_smaxv8hi (__a, __b);
17432 }
17433 
17434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmaxq_s32(int32x4_t __a,int32x4_t __b)17435 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17436 {
17437   return __builtin_aarch64_smaxv4si (__a, __b);
17438 }
17439 
17440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmaxq_u8(uint8x16_t __a,uint8x16_t __b)17441 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17442 {
17443   return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17444 						   (int8x16_t) __b);
17445 }
17446 
17447 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmaxq_u16(uint16x8_t __a,uint16x8_t __b)17448 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17449 {
17450   return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17451 						  (int16x8_t) __b);
17452 }
17453 
17454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmaxq_u32(uint32x4_t __a,uint32x4_t __b)17455 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17456 {
17457   return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17458 						  (int32x4_t) __b);
17459 }
17460 /* vmulx */
17461 
17462 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_f32(float32x2_t __a,float32x2_t __b)17463 vmulx_f32 (float32x2_t __a, float32x2_t __b)
17464 {
17465   return __builtin_aarch64_fmulxv2sf (__a, __b);
17466 }
17467 
17468 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulxq_f32(float32x4_t __a,float32x4_t __b)17469 vmulxq_f32 (float32x4_t __a, float32x4_t __b)
17470 {
17471   return __builtin_aarch64_fmulxv4sf (__a, __b);
17472 }
17473 
17474 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmulx_f64(float64x1_t __a,float64x1_t __b)17475 vmulx_f64 (float64x1_t __a, float64x1_t __b)
17476 {
17477   return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
17478 }
17479 
17480 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulxq_f64(float64x2_t __a,float64x2_t __b)17481 vmulxq_f64 (float64x2_t __a, float64x2_t __b)
17482 {
17483   return __builtin_aarch64_fmulxv2df (__a, __b);
17484 }
17485 
17486 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmulxs_f32(float32_t __a,float32_t __b)17487 vmulxs_f32 (float32_t __a, float32_t __b)
17488 {
17489   return __builtin_aarch64_fmulxsf (__a, __b);
17490 }
17491 
17492 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmulxd_f64(float64_t __a,float64_t __b)17493 vmulxd_f64 (float64_t __a, float64_t __b)
17494 {
17495   return __builtin_aarch64_fmulxdf (__a, __b);
17496 }
17497 
17498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_lane_f32(float32x2_t __a,float32x2_t __v,const int __lane)17499 vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
17500 {
17501   return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
17502 }
17503 
17504 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmulx_lane_f64(float64x1_t __a,float64x1_t __v,const int __lane)17505 vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
17506 {
17507   return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
17508 }
17509 
17510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulxq_lane_f32(float32x4_t __a,float32x2_t __v,const int __lane)17511 vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
17512 {
17513   return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
17514 }
17515 
17516 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulxq_lane_f64(float64x2_t __a,float64x1_t __v,const int __lane)17517 vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
17518 {
17519   return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
17520 }
17521 
17522 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_laneq_f32(float32x2_t __a,float32x4_t __v,const int __lane)17523 vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
17524 {
17525   return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
17526 }
17527 
17528 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmulx_laneq_f64(float64x1_t __a,float64x2_t __v,const int __lane)17529 vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
17530 {
17531   return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
17532 }
17533 
17534 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulxq_laneq_f32(float32x4_t __a,float32x4_t __v,const int __lane)17535 vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
17536 {
17537   return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
17538 }
17539 
17540 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulxq_laneq_f64(float64x2_t __a,float64x2_t __v,const int __lane)17541 vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
17542 {
17543   return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
17544 }
17545 
17546 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmulxs_lane_f32(float32_t __a,float32x2_t __v,const int __lane)17547 vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
17548 {
17549   return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17550 }
17551 
17552 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmulxs_laneq_f32(float32_t __a,float32x4_t __v,const int __lane)17553 vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
17554 {
17555   return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17556 }
17557 
17558 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmulxd_lane_f64(float64_t __a,float64x1_t __v,const int __lane)17559 vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
17560 {
17561   return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17562 }
17563 
17564 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmulxd_laneq_f64(float64_t __a,float64x2_t __v,const int __lane)17565 vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
17566 {
17567   return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17568 }
17569 
17570 /* vpmax  */
17571 
17572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpmax_s8(int8x8_t a,int8x8_t b)17573 vpmax_s8 (int8x8_t a, int8x8_t b)
17574 {
17575   return __builtin_aarch64_smaxpv8qi (a, b);
17576 }
17577 
17578 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpmax_s16(int16x4_t a,int16x4_t b)17579 vpmax_s16 (int16x4_t a, int16x4_t b)
17580 {
17581   return __builtin_aarch64_smaxpv4hi (a, b);
17582 }
17583 
17584 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpmax_s32(int32x2_t a,int32x2_t b)17585 vpmax_s32 (int32x2_t a, int32x2_t b)
17586 {
17587   return __builtin_aarch64_smaxpv2si (a, b);
17588 }
17589 
17590 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpmax_u8(uint8x8_t a,uint8x8_t b)17591 vpmax_u8 (uint8x8_t a, uint8x8_t b)
17592 {
17593   return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a,
17594 						  (int8x8_t) b);
17595 }
17596 
17597 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpmax_u16(uint16x4_t a,uint16x4_t b)17598 vpmax_u16 (uint16x4_t a, uint16x4_t b)
17599 {
17600   return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a,
17601 						   (int16x4_t) b);
17602 }
17603 
17604 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpmax_u32(uint32x2_t a,uint32x2_t b)17605 vpmax_u32 (uint32x2_t a, uint32x2_t b)
17606 {
17607   return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a,
17608 						   (int32x2_t) b);
17609 }
17610 
17611 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpmaxq_s8(int8x16_t a,int8x16_t b)17612 vpmaxq_s8 (int8x16_t a, int8x16_t b)
17613 {
17614   return __builtin_aarch64_smaxpv16qi (a, b);
17615 }
17616 
17617 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpmaxq_s16(int16x8_t a,int16x8_t b)17618 vpmaxq_s16 (int16x8_t a, int16x8_t b)
17619 {
17620   return __builtin_aarch64_smaxpv8hi (a, b);
17621 }
17622 
17623 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpmaxq_s32(int32x4_t a,int32x4_t b)17624 vpmaxq_s32 (int32x4_t a, int32x4_t b)
17625 {
17626   return __builtin_aarch64_smaxpv4si (a, b);
17627 }
17628 
17629 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpmaxq_u8(uint8x16_t a,uint8x16_t b)17630 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
17631 {
17632   return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a,
17633 						    (int8x16_t) b);
17634 }
17635 
17636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpmaxq_u16(uint16x8_t a,uint16x8_t b)17637 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
17638 {
17639   return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a,
17640 						   (int16x8_t) b);
17641 }
17642 
17643 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpmaxq_u32(uint32x4_t a,uint32x4_t b)17644 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
17645 {
17646   return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a,
17647 						   (int32x4_t) b);
17648 }
17649 
17650 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmax_f32(float32x2_t a,float32x2_t b)17651 vpmax_f32 (float32x2_t a, float32x2_t b)
17652 {
17653   return __builtin_aarch64_smax_nanpv2sf (a, b);
17654 }
17655 
17656 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpmaxq_f32(float32x4_t a,float32x4_t b)17657 vpmaxq_f32 (float32x4_t a, float32x4_t b)
17658 {
17659   return __builtin_aarch64_smax_nanpv4sf (a, b);
17660 }
17661 
17662 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpmaxq_f64(float64x2_t a,float64x2_t b)17663 vpmaxq_f64 (float64x2_t a, float64x2_t b)
17664 {
17665   return __builtin_aarch64_smax_nanpv2df (a, b);
17666 }
17667 
17668 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpmaxqd_f64(float64x2_t a)17669 vpmaxqd_f64 (float64x2_t a)
17670 {
17671   return __builtin_aarch64_reduc_smax_nan_scal_v2df (a);
17672 }
17673 
17674 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmaxs_f32(float32x2_t a)17675 vpmaxs_f32 (float32x2_t a)
17676 {
17677   return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a);
17678 }
17679 
17680 /* vpmaxnm  */
17681 
17682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmaxnm_f32(float32x2_t a,float32x2_t b)17683 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
17684 {
17685   return __builtin_aarch64_smaxpv2sf (a, b);
17686 }
17687 
17688 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpmaxnmq_f32(float32x4_t a,float32x4_t b)17689 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
17690 {
17691   return __builtin_aarch64_smaxpv4sf (a, b);
17692 }
17693 
17694 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpmaxnmq_f64(float64x2_t a,float64x2_t b)17695 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
17696 {
17697   return __builtin_aarch64_smaxpv2df (a, b);
17698 }
17699 
17700 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpmaxnmqd_f64(float64x2_t a)17701 vpmaxnmqd_f64 (float64x2_t a)
17702 {
17703   return __builtin_aarch64_reduc_smax_scal_v2df (a);
17704 }
17705 
17706 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmaxnms_f32(float32x2_t a)17707 vpmaxnms_f32 (float32x2_t a)
17708 {
17709   return __builtin_aarch64_reduc_smax_scal_v2sf (a);
17710 }
17711 
17712 /* vpmin  */
17713 
17714 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpmin_s8(int8x8_t a,int8x8_t b)17715 vpmin_s8 (int8x8_t a, int8x8_t b)
17716 {
17717   return __builtin_aarch64_sminpv8qi (a, b);
17718 }
17719 
17720 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpmin_s16(int16x4_t a,int16x4_t b)17721 vpmin_s16 (int16x4_t a, int16x4_t b)
17722 {
17723   return __builtin_aarch64_sminpv4hi (a, b);
17724 }
17725 
17726 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpmin_s32(int32x2_t a,int32x2_t b)17727 vpmin_s32 (int32x2_t a, int32x2_t b)
17728 {
17729   return __builtin_aarch64_sminpv2si (a, b);
17730 }
17731 
17732 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpmin_u8(uint8x8_t a,uint8x8_t b)17733 vpmin_u8 (uint8x8_t a, uint8x8_t b)
17734 {
17735   return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a,
17736 						  (int8x8_t) b);
17737 }
17738 
17739 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpmin_u16(uint16x4_t a,uint16x4_t b)17740 vpmin_u16 (uint16x4_t a, uint16x4_t b)
17741 {
17742   return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a,
17743 						   (int16x4_t) b);
17744 }
17745 
17746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpmin_u32(uint32x2_t a,uint32x2_t b)17747 vpmin_u32 (uint32x2_t a, uint32x2_t b)
17748 {
17749   return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a,
17750 						   (int32x2_t) b);
17751 }
17752 
17753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpminq_s8(int8x16_t a,int8x16_t b)17754 vpminq_s8 (int8x16_t a, int8x16_t b)
17755 {
17756   return __builtin_aarch64_sminpv16qi (a, b);
17757 }
17758 
17759 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpminq_s16(int16x8_t a,int16x8_t b)17760 vpminq_s16 (int16x8_t a, int16x8_t b)
17761 {
17762   return __builtin_aarch64_sminpv8hi (a, b);
17763 }
17764 
17765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpminq_s32(int32x4_t a,int32x4_t b)17766 vpminq_s32 (int32x4_t a, int32x4_t b)
17767 {
17768   return __builtin_aarch64_sminpv4si (a, b);
17769 }
17770 
17771 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpminq_u8(uint8x16_t a,uint8x16_t b)17772 vpminq_u8 (uint8x16_t a, uint8x16_t b)
17773 {
17774   return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a,
17775 						    (int8x16_t) b);
17776 }
17777 
17778 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpminq_u16(uint16x8_t a,uint16x8_t b)17779 vpminq_u16 (uint16x8_t a, uint16x8_t b)
17780 {
17781   return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a,
17782 						   (int16x8_t) b);
17783 }
17784 
17785 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpminq_u32(uint32x4_t a,uint32x4_t b)17786 vpminq_u32 (uint32x4_t a, uint32x4_t b)
17787 {
17788   return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a,
17789 						   (int32x4_t) b);
17790 }
17791 
17792 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmin_f32(float32x2_t a,float32x2_t b)17793 vpmin_f32 (float32x2_t a, float32x2_t b)
17794 {
17795   return __builtin_aarch64_smin_nanpv2sf (a, b);
17796 }
17797 
17798 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpminq_f32(float32x4_t a,float32x4_t b)17799 vpminq_f32 (float32x4_t a, float32x4_t b)
17800 {
17801   return __builtin_aarch64_smin_nanpv4sf (a, b);
17802 }
17803 
17804 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpminq_f64(float64x2_t a,float64x2_t b)17805 vpminq_f64 (float64x2_t a, float64x2_t b)
17806 {
17807   return __builtin_aarch64_smin_nanpv2df (a, b);
17808 }
17809 
17810 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpminqd_f64(float64x2_t a)17811 vpminqd_f64 (float64x2_t a)
17812 {
17813   return __builtin_aarch64_reduc_smin_nan_scal_v2df (a);
17814 }
17815 
17816 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmins_f32(float32x2_t a)17817 vpmins_f32 (float32x2_t a)
17818 {
17819   return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a);
17820 }
17821 
17822 /* vpminnm  */
17823 
17824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpminnm_f32(float32x2_t a,float32x2_t b)17825 vpminnm_f32 (float32x2_t a, float32x2_t b)
17826 {
17827   return __builtin_aarch64_sminpv2sf (a, b);
17828 }
17829 
17830 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpminnmq_f32(float32x4_t a,float32x4_t b)17831 vpminnmq_f32 (float32x4_t a, float32x4_t b)
17832 {
17833   return __builtin_aarch64_sminpv4sf (a, b);
17834 }
17835 
17836 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpminnmq_f64(float64x2_t a,float64x2_t b)17837 vpminnmq_f64 (float64x2_t a, float64x2_t b)
17838 {
17839   return __builtin_aarch64_sminpv2df (a, b);
17840 }
17841 
17842 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpminnmqd_f64(float64x2_t a)17843 vpminnmqd_f64 (float64x2_t a)
17844 {
17845   return __builtin_aarch64_reduc_smin_scal_v2df (a);
17846 }
17847 
17848 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpminnms_f32(float32x2_t a)17849 vpminnms_f32 (float32x2_t a)
17850 {
17851   return __builtin_aarch64_reduc_smin_scal_v2sf (a);
17852 }
17853 
17854 /* vmaxnm  */
17855 
17856 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmaxnm_f32(float32x2_t __a,float32x2_t __b)17857 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
17858 {
17859   return __builtin_aarch64_fmaxv2sf (__a, __b);
17860 }
17861 
17862 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32(float32x4_t __a,float32x4_t __b)17863 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
17864 {
17865   return __builtin_aarch64_fmaxv4sf (__a, __b);
17866 }
17867 
17868 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxnmq_f64(float64x2_t __a,float64x2_t __b)17869 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
17870 {
17871   return __builtin_aarch64_fmaxv2df (__a, __b);
17872 }
17873 
17874 /* vmaxv  */
17875 
17876 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxv_f32(float32x2_t __a)17877 vmaxv_f32 (float32x2_t __a)
17878 {
17879   return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
17880 }
17881 
17882 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxv_s8(int8x8_t __a)17883 vmaxv_s8 (int8x8_t __a)
17884 {
17885   return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
17886 }
17887 
17888 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxv_s16(int16x4_t __a)17889 vmaxv_s16 (int16x4_t __a)
17890 {
17891   return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
17892 }
17893 
17894 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxv_s32(int32x2_t __a)17895 vmaxv_s32 (int32x2_t __a)
17896 {
17897   return __builtin_aarch64_reduc_smax_scal_v2si (__a);
17898 }
17899 
17900 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxv_u8(uint8x8_t __a)17901 vmaxv_u8 (uint8x8_t __a)
17902 {
17903   return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
17904 }
17905 
17906 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxv_u16(uint16x4_t __a)17907 vmaxv_u16 (uint16x4_t __a)
17908 {
17909   return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
17910 }
17911 
17912 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxv_u32(uint32x2_t __a)17913 vmaxv_u32 (uint32x2_t __a)
17914 {
17915   return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
17916 }
17917 
17918 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxvq_f32(float32x4_t __a)17919 vmaxvq_f32 (float32x4_t __a)
17920 {
17921   return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
17922 }
17923 
17924 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxvq_f64(float64x2_t __a)17925 vmaxvq_f64 (float64x2_t __a)
17926 {
17927   return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
17928 }
17929 
17930 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxvq_s8(int8x16_t __a)17931 vmaxvq_s8 (int8x16_t __a)
17932 {
17933   return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
17934 }
17935 
17936 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxvq_s16(int16x8_t __a)17937 vmaxvq_s16 (int16x8_t __a)
17938 {
17939   return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
17940 }
17941 
17942 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxvq_s32(int32x4_t __a)17943 vmaxvq_s32 (int32x4_t __a)
17944 {
17945   return __builtin_aarch64_reduc_smax_scal_v4si (__a);
17946 }
17947 
17948 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxvq_u8(uint8x16_t __a)17949 vmaxvq_u8 (uint8x16_t __a)
17950 {
17951   return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
17952 }
17953 
17954 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxvq_u16(uint16x8_t __a)17955 vmaxvq_u16 (uint16x8_t __a)
17956 {
17957   return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
17958 }
17959 
17960 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxvq_u32(uint32x4_t __a)17961 vmaxvq_u32 (uint32x4_t __a)
17962 {
17963   return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
17964 }
17965 
17966 /* vmaxnmv  */
17967 
17968 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmv_f32(float32x2_t __a)17969 vmaxnmv_f32 (float32x2_t __a)
17970 {
17971   return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
17972 }
17973 
17974 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmvq_f32(float32x4_t __a)17975 vmaxnmvq_f32 (float32x4_t __a)
17976 {
17977   return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
17978 }
17979 
17980 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxnmvq_f64(float64x2_t __a)17981 vmaxnmvq_f64 (float64x2_t __a)
17982 {
17983   return __builtin_aarch64_reduc_smax_scal_v2df (__a);
17984 }
17985 
17986 /* vmin  */
17987 
17988 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmin_f32(float32x2_t __a,float32x2_t __b)17989 vmin_f32 (float32x2_t __a, float32x2_t __b)
17990 {
17991   return __builtin_aarch64_smin_nanv2sf (__a, __b);
17992 }
17993 
17994 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmin_s8(int8x8_t __a,int8x8_t __b)17995 vmin_s8 (int8x8_t __a, int8x8_t __b)
17996 {
17997   return __builtin_aarch64_sminv8qi (__a, __b);
17998 }
17999 
18000 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmin_s16(int16x4_t __a,int16x4_t __b)18001 vmin_s16 (int16x4_t __a, int16x4_t __b)
18002 {
18003   return __builtin_aarch64_sminv4hi (__a, __b);
18004 }
18005 
18006 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmin_s32(int32x2_t __a,int32x2_t __b)18007 vmin_s32 (int32x2_t __a, int32x2_t __b)
18008 {
18009   return __builtin_aarch64_sminv2si (__a, __b);
18010 }
18011 
18012 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmin_u8(uint8x8_t __a,uint8x8_t __b)18013 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18014 {
18015   return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18016 						 (int8x8_t) __b);
18017 }
18018 
18019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmin_u16(uint16x4_t __a,uint16x4_t __b)18020 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18021 {
18022   return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18023 						  (int16x4_t) __b);
18024 }
18025 
18026 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmin_u32(uint32x2_t __a,uint32x2_t __b)18027 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18028 {
18029   return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18030 						  (int32x2_t) __b);
18031 }
18032 
18033 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminq_f32(float32x4_t __a,float32x4_t __b)18034 vminq_f32 (float32x4_t __a, float32x4_t __b)
18035 {
18036   return __builtin_aarch64_smin_nanv4sf (__a, __b);
18037 }
18038 
18039 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vminq_f64(float64x2_t __a,float64x2_t __b)18040 vminq_f64 (float64x2_t __a, float64x2_t __b)
18041 {
18042   return __builtin_aarch64_smin_nanv2df (__a, __b);
18043 }
18044 
18045 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vminq_s8(int8x16_t __a,int8x16_t __b)18046 vminq_s8 (int8x16_t __a, int8x16_t __b)
18047 {
18048   return __builtin_aarch64_sminv16qi (__a, __b);
18049 }
18050 
18051 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vminq_s16(int16x8_t __a,int16x8_t __b)18052 vminq_s16 (int16x8_t __a, int16x8_t __b)
18053 {
18054   return __builtin_aarch64_sminv8hi (__a, __b);
18055 }
18056 
18057 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vminq_s32(int32x4_t __a,int32x4_t __b)18058 vminq_s32 (int32x4_t __a, int32x4_t __b)
18059 {
18060   return __builtin_aarch64_sminv4si (__a, __b);
18061 }
18062 
18063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vminq_u8(uint8x16_t __a,uint8x16_t __b)18064 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18065 {
18066   return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18067 						   (int8x16_t) __b);
18068 }
18069 
18070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vminq_u16(uint16x8_t __a,uint16x8_t __b)18071 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18072 {
18073   return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18074 						  (int16x8_t) __b);
18075 }
18076 
18077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vminq_u32(uint32x4_t __a,uint32x4_t __b)18078 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18079 {
18080   return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18081 						  (int32x4_t) __b);
18082 }
18083 
18084 /* vminnm  */
18085 
18086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vminnm_f32(float32x2_t __a,float32x2_t __b)18087 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18088 {
18089   return __builtin_aarch64_fminv2sf (__a, __b);
18090 }
18091 
18092 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminnmq_f32(float32x4_t __a,float32x4_t __b)18093 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18094 {
18095   return __builtin_aarch64_fminv4sf (__a, __b);
18096 }
18097 
18098 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vminnmq_f64(float64x2_t __a,float64x2_t __b)18099 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18100 {
18101   return __builtin_aarch64_fminv2df (__a, __b);
18102 }
18103 
18104 /* vminv  */
18105 
18106 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminv_f32(float32x2_t __a)18107 vminv_f32 (float32x2_t __a)
18108 {
18109   return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
18110 }
18111 
18112 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminv_s8(int8x8_t __a)18113 vminv_s8 (int8x8_t __a)
18114 {
18115   return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
18116 }
18117 
18118 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminv_s16(int16x4_t __a)18119 vminv_s16 (int16x4_t __a)
18120 {
18121   return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
18122 }
18123 
18124 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminv_s32(int32x2_t __a)18125 vminv_s32 (int32x2_t __a)
18126 {
18127   return __builtin_aarch64_reduc_smin_scal_v2si (__a);
18128 }
18129 
18130 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminv_u8(uint8x8_t __a)18131 vminv_u8 (uint8x8_t __a)
18132 {
18133   return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
18134 }
18135 
18136 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminv_u16(uint16x4_t __a)18137 vminv_u16 (uint16x4_t __a)
18138 {
18139   return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
18140 }
18141 
18142 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminv_u32(uint32x2_t __a)18143 vminv_u32 (uint32x2_t __a)
18144 {
18145   return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
18146 }
18147 
18148 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminvq_f32(float32x4_t __a)18149 vminvq_f32 (float32x4_t __a)
18150 {
18151   return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
18152 }
18153 
18154 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminvq_f64(float64x2_t __a)18155 vminvq_f64 (float64x2_t __a)
18156 {
18157   return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
18158 }
18159 
18160 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminvq_s8(int8x16_t __a)18161 vminvq_s8 (int8x16_t __a)
18162 {
18163   return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
18164 }
18165 
18166 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminvq_s16(int16x8_t __a)18167 vminvq_s16 (int16x8_t __a)
18168 {
18169   return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
18170 }
18171 
18172 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminvq_s32(int32x4_t __a)18173 vminvq_s32 (int32x4_t __a)
18174 {
18175   return __builtin_aarch64_reduc_smin_scal_v4si (__a);
18176 }
18177 
18178 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminvq_u8(uint8x16_t __a)18179 vminvq_u8 (uint8x16_t __a)
18180 {
18181   return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
18182 }
18183 
18184 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminvq_u16(uint16x8_t __a)18185 vminvq_u16 (uint16x8_t __a)
18186 {
18187   return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
18188 }
18189 
18190 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminvq_u32(uint32x4_t __a)18191 vminvq_u32 (uint32x4_t __a)
18192 {
18193   return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
18194 }
18195 
18196 /* vminnmv  */
18197 
18198 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmv_f32(float32x2_t __a)18199 vminnmv_f32 (float32x2_t __a)
18200 {
18201   return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
18202 }
18203 
18204 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmvq_f32(float32x4_t __a)18205 vminnmvq_f32 (float32x4_t __a)
18206 {
18207   return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
18208 }
18209 
18210 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminnmvq_f64(float64x2_t __a)18211 vminnmvq_f64 (float64x2_t __a)
18212 {
18213   return __builtin_aarch64_reduc_smin_scal_v2df (__a);
18214 }
18215 
18216 /* vmla */
18217 
18218 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)18219 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18220 {
18221   return a + b * c;
18222 }
18223 
18224 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmla_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c)18225 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18226 {
18227   return __a + __b * __c;
18228 }
18229 
18230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)18231 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18232 {
18233   return a + b * c;
18234 }
18235 
18236 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlaq_f64(float64x2_t a,float64x2_t b,float64x2_t c)18237 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18238 {
18239   return a + b * c;
18240 }
18241 
18242 /* vmla_lane  */
18243 
18244 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_lane_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c,const int __lane)18245 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18246 	       float32x2_t __c, const int __lane)
18247 {
18248   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18249 }
18250 
18251 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_lane_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c,const int __lane)18252 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18253 		int16x4_t __c, const int __lane)
18254 {
18255   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18256 }
18257 
18258 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_lane_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c,const int __lane)18259 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18260 		int32x2_t __c, const int __lane)
18261 {
18262   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18263 }
18264 
18265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_lane_u16(uint16x4_t __a,uint16x4_t __b,uint16x4_t __c,const int __lane)18266 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18267 		uint16x4_t __c, const int __lane)
18268 {
18269   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18270 }
18271 
18272 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_lane_u32(uint32x2_t __a,uint32x2_t __b,uint32x2_t __c,const int __lane)18273 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18274 	       uint32x2_t __c, const int __lane)
18275 {
18276   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18277 }
18278 
18279 /* vmla_laneq  */
18280 
18281 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_laneq_f32(float32x2_t __a,float32x2_t __b,float32x4_t __c,const int __lane)18282 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18283 	        float32x4_t __c, const int __lane)
18284 {
18285   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18286 }
18287 
18288 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_laneq_s16(int16x4_t __a,int16x4_t __b,int16x8_t __c,const int __lane)18289 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18290 		int16x8_t __c, const int __lane)
18291 {
18292   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18293 }
18294 
18295 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_laneq_s32(int32x2_t __a,int32x2_t __b,int32x4_t __c,const int __lane)18296 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18297 		int32x4_t __c, const int __lane)
18298 {
18299   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18300 }
18301 
18302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_laneq_u16(uint16x4_t __a,uint16x4_t __b,uint16x8_t __c,const int __lane)18303 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18304 		uint16x8_t __c, const int __lane)
18305 {
18306   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18307 }
18308 
18309 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_laneq_u32(uint32x2_t __a,uint32x2_t __b,uint32x4_t __c,const int __lane)18310 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18311 		uint32x4_t __c, const int __lane)
18312 {
18313   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18314 }
18315 
18316 /* vmlaq_lane  */
18317 
18318 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_lane_f32(float32x4_t __a,float32x4_t __b,float32x2_t __c,const int __lane)18319 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18320 		float32x2_t __c, const int __lane)
18321 {
18322   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18323 }
18324 
18325 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_lane_s16(int16x8_t __a,int16x8_t __b,int16x4_t __c,const int __lane)18326 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18327 		int16x4_t __c, const int __lane)
18328 {
18329   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18330 }
18331 
18332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_lane_s32(int32x4_t __a,int32x4_t __b,int32x2_t __c,const int __lane)18333 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18334 		int32x2_t __c, const int __lane)
18335 {
18336   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18337 }
18338 
18339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_lane_u16(uint16x8_t __a,uint16x8_t __b,uint16x4_t __c,const int __lane)18340 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18341 		uint16x4_t __c, const int __lane)
18342 {
18343   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18344 }
18345 
18346 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_lane_u32(uint32x4_t __a,uint32x4_t __b,uint32x2_t __c,const int __lane)18347 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18348 		uint32x2_t __c, const int __lane)
18349 {
18350   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18351 }
18352 
18353   /* vmlaq_laneq  */
18354 
18355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_laneq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c,const int __lane)18356 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18357 		 float32x4_t __c, const int __lane)
18358 {
18359   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18360 }
18361 
18362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_laneq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c,const int __lane)18363 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18364 		int16x8_t __c, const int __lane)
18365 {
18366   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18367 }
18368 
18369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_laneq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c,const int __lane)18370 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18371 		int32x4_t __c, const int __lane)
18372 {
18373   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18374 }
18375 
18376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_laneq_u16(uint16x8_t __a,uint16x8_t __b,uint16x8_t __c,const int __lane)18377 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18378 		uint16x8_t __c, const int __lane)
18379 {
18380   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18381 }
18382 
18383 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_laneq_u32(uint32x4_t __a,uint32x4_t __b,uint32x4_t __c,const int __lane)18384 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18385 		uint32x4_t __c, const int __lane)
18386 {
18387   return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18388 }
18389 
18390 /* vmls  */
18391 
18392 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)18393 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18394 {
18395   return a - b * c;
18396 }
18397 
18398 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmls_f64(float64x1_t __a,float64x1_t __b,float64x1_t __c)18399 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18400 {
18401   return __a - __b * __c;
18402 }
18403 
18404 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)18405 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18406 {
18407   return a - b * c;
18408 }
18409 
18410 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlsq_f64(float64x2_t a,float64x2_t b,float64x2_t c)18411 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18412 {
18413   return a - b * c;
18414 }
18415 
18416 /* vmls_lane  */
18417 
18418 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_lane_f32(float32x2_t __a,float32x2_t __b,float32x2_t __c,const int __lane)18419 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18420 	       float32x2_t __c, const int __lane)
18421 {
18422   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18423 }
18424 
18425 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_lane_s16(int16x4_t __a,int16x4_t __b,int16x4_t __c,const int __lane)18426 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18427 		int16x4_t __c, const int __lane)
18428 {
18429   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18430 }
18431 
18432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_lane_s32(int32x2_t __a,int32x2_t __b,int32x2_t __c,const int __lane)18433 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18434 		int32x2_t __c, const int __lane)
18435 {
18436   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18437 }
18438 
18439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_lane_u16(uint16x4_t __a,uint16x4_t __b,uint16x4_t __c,const int __lane)18440 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18441 		uint16x4_t __c, const int __lane)
18442 {
18443   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18444 }
18445 
18446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_lane_u32(uint32x2_t __a,uint32x2_t __b,uint32x2_t __c,const int __lane)18447 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18448 	       uint32x2_t __c, const int __lane)
18449 {
18450   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18451 }
18452 
18453 /* vmls_laneq  */
18454 
18455 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_laneq_f32(float32x2_t __a,float32x2_t __b,float32x4_t __c,const int __lane)18456 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18457 	       float32x4_t __c, const int __lane)
18458 {
18459   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18460 }
18461 
18462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_laneq_s16(int16x4_t __a,int16x4_t __b,int16x8_t __c,const int __lane)18463 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18464 		int16x8_t __c, const int __lane)
18465 {
18466   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18467 }
18468 
18469 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_laneq_s32(int32x2_t __a,int32x2_t __b,int32x4_t __c,const int __lane)18470 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18471 		int32x4_t __c, const int __lane)
18472 {
18473   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18474 }
18475 
18476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_laneq_u16(uint16x4_t __a,uint16x4_t __b,uint16x8_t __c,const int __lane)18477 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18478 		uint16x8_t __c, const int __lane)
18479 {
18480   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18481 }
18482 
18483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_laneq_u32(uint32x2_t __a,uint32x2_t __b,uint32x4_t __c,const int __lane)18484 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18485 		uint32x4_t __c, const int __lane)
18486 {
18487   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18488 }
18489 
18490 /* vmlsq_lane  */
18491 
18492 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_lane_f32(float32x4_t __a,float32x4_t __b,float32x2_t __c,const int __lane)18493 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18494 		float32x2_t __c, const int __lane)
18495 {
18496   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18497 }
18498 
18499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_lane_s16(int16x8_t __a,int16x8_t __b,int16x4_t __c,const int __lane)18500 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18501 		int16x4_t __c, const int __lane)
18502 {
18503   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18504 }
18505 
18506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_lane_s32(int32x4_t __a,int32x4_t __b,int32x2_t __c,const int __lane)18507 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18508 		int32x2_t __c, const int __lane)
18509 {
18510   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18511 }
18512 
18513 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_lane_u16(uint16x8_t __a,uint16x8_t __b,uint16x4_t __c,const int __lane)18514 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18515 		uint16x4_t __c, const int __lane)
18516 {
18517   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18518 }
18519 
18520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_lane_u32(uint32x4_t __a,uint32x4_t __b,uint32x2_t __c,const int __lane)18521 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18522 		uint32x2_t __c, const int __lane)
18523 {
18524   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18525 }
18526 
18527   /* vmlsq_laneq  */
18528 
18529 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_laneq_f32(float32x4_t __a,float32x4_t __b,float32x4_t __c,const int __lane)18530 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18531 		float32x4_t __c, const int __lane)
18532 {
18533   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18534 }
18535 
18536 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_laneq_s16(int16x8_t __a,int16x8_t __b,int16x8_t __c,const int __lane)18537 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18538 		int16x8_t __c, const int __lane)
18539 {
18540   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18541 }
18542 
18543 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_laneq_s32(int32x4_t __a,int32x4_t __b,int32x4_t __c,const int __lane)18544 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18545 		int32x4_t __c, const int __lane)
18546 {
18547   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18548 }
18549 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_laneq_u16(uint16x8_t __a,uint16x8_t __b,uint16x8_t __c,const int __lane)18550 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18551 		uint16x8_t __c, const int __lane)
18552 {
18553   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18554 }
18555 
18556 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_laneq_u32(uint32x4_t __a,uint32x4_t __b,uint32x4_t __c,const int __lane)18557 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18558 		uint32x4_t __c, const int __lane)
18559 {
18560   return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18561 }
18562 
18563 /* vmov_n_  */
18564 
18565 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmov_n_f32(float32_t __a)18566 vmov_n_f32 (float32_t __a)
18567 {
18568   return vdup_n_f32 (__a);
18569 }
18570 
18571 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmov_n_f64(float64_t __a)18572 vmov_n_f64 (float64_t __a)
18573 {
18574   return (float64x1_t) {__a};
18575 }
18576 
18577 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmov_n_p8(poly8_t __a)18578 vmov_n_p8 (poly8_t __a)
18579 {
18580   return vdup_n_p8 (__a);
18581 }
18582 
18583 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vmov_n_p16(poly16_t __a)18584 vmov_n_p16 (poly16_t __a)
18585 {
18586   return vdup_n_p16 (__a);
18587 }
18588 
18589 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmov_n_s8(int8_t __a)18590 vmov_n_s8 (int8_t __a)
18591 {
18592   return vdup_n_s8 (__a);
18593 }
18594 
18595 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmov_n_s16(int16_t __a)18596 vmov_n_s16 (int16_t __a)
18597 {
18598   return vdup_n_s16 (__a);
18599 }
18600 
18601 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmov_n_s32(int32_t __a)18602 vmov_n_s32 (int32_t __a)
18603 {
18604   return vdup_n_s32 (__a);
18605 }
18606 
18607 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vmov_n_s64(int64_t __a)18608 vmov_n_s64 (int64_t __a)
18609 {
18610   return (int64x1_t) {__a};
18611 }
18612 
18613 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmov_n_u8(uint8_t __a)18614 vmov_n_u8 (uint8_t __a)
18615 {
18616   return vdup_n_u8 (__a);
18617 }
18618 
18619 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmov_n_u16(uint16_t __a)18620 vmov_n_u16 (uint16_t __a)
18621 {
18622     return vdup_n_u16 (__a);
18623 }
18624 
18625 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmov_n_u32(uint32_t __a)18626 vmov_n_u32 (uint32_t __a)
18627 {
18628    return vdup_n_u32 (__a);
18629 }
18630 
18631 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vmov_n_u64(uint64_t __a)18632 vmov_n_u64 (uint64_t __a)
18633 {
18634   return (uint64x1_t) {__a};
18635 }
18636 
18637 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmovq_n_f32(float32_t __a)18638 vmovq_n_f32 (float32_t __a)
18639 {
18640   return vdupq_n_f32 (__a);
18641 }
18642 
18643 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmovq_n_f64(float64_t __a)18644 vmovq_n_f64 (float64_t __a)
18645 {
18646   return vdupq_n_f64 (__a);
18647 }
18648 
18649 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmovq_n_p8(poly8_t __a)18650 vmovq_n_p8 (poly8_t __a)
18651 {
18652   return vdupq_n_p8 (__a);
18653 }
18654 
18655 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmovq_n_p16(poly16_t __a)18656 vmovq_n_p16 (poly16_t __a)
18657 {
18658   return vdupq_n_p16 (__a);
18659 }
18660 
18661 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovq_n_s8(int8_t __a)18662 vmovq_n_s8 (int8_t __a)
18663 {
18664   return vdupq_n_s8 (__a);
18665 }
18666 
18667 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovq_n_s16(int16_t __a)18668 vmovq_n_s16 (int16_t __a)
18669 {
18670   return vdupq_n_s16 (__a);
18671 }
18672 
18673 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovq_n_s32(int32_t __a)18674 vmovq_n_s32 (int32_t __a)
18675 {
18676   return vdupq_n_s32 (__a);
18677 }
18678 
18679 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovq_n_s64(int64_t __a)18680 vmovq_n_s64 (int64_t __a)
18681 {
18682   return vdupq_n_s64 (__a);
18683 }
18684 
18685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmovq_n_u8(uint8_t __a)18686 vmovq_n_u8 (uint8_t __a)
18687 {
18688   return vdupq_n_u8 (__a);
18689 }
18690 
18691 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovq_n_u16(uint16_t __a)18692 vmovq_n_u16 (uint16_t __a)
18693 {
18694   return vdupq_n_u16 (__a);
18695 }
18696 
18697 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovq_n_u32(uint32_t __a)18698 vmovq_n_u32 (uint32_t __a)
18699 {
18700   return vdupq_n_u32 (__a);
18701 }
18702 
18703 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovq_n_u64(uint64_t __a)18704 vmovq_n_u64 (uint64_t __a)
18705 {
18706   return vdupq_n_u64 (__a);
18707 }
18708 
18709 /* vmul_lane  */
18710 
18711 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_lane_f32(float32x2_t __a,float32x2_t __b,const int __lane)18712 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18713 {
18714   return __a * __aarch64_vget_lane_any (__b, __lane);
18715 }
18716 
18717 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_lane_f64(float64x1_t __a,float64x1_t __b,const int __lane)18718 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18719 {
18720   return __a * __b;
18721 }
18722 
18723 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_lane_s16(int16x4_t __a,int16x4_t __b,const int __lane)18724 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18725 {
18726   return __a * __aarch64_vget_lane_any (__b, __lane);
18727 }
18728 
18729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_lane_s32(int32x2_t __a,int32x2_t __b,const int __lane)18730 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18731 {
18732   return __a * __aarch64_vget_lane_any (__b, __lane);
18733 }
18734 
18735 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_lane_u16(uint16x4_t __a,uint16x4_t __b,const int __lane)18736 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18737 {
18738   return __a * __aarch64_vget_lane_any (__b, __lane);
18739 }
18740 
18741 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_lane_u32(uint32x2_t __a,uint32x2_t __b,const int __lane)18742 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18743 {
18744   return __a * __aarch64_vget_lane_any (__b, __lane);
18745 }
18746 
18747 /* vmuld_lane  */
18748 
18749 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmuld_lane_f64(float64_t __a,float64x1_t __b,const int __lane)18750 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18751 {
18752   return __a * __aarch64_vget_lane_any (__b, __lane);
18753 }
18754 
18755 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmuld_laneq_f64(float64_t __a,float64x2_t __b,const int __lane)18756 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18757 {
18758   return __a * __aarch64_vget_lane_any (__b, __lane);
18759 }
18760 
18761 /* vmuls_lane  */
18762 
18763 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmuls_lane_f32(float32_t __a,float32x2_t __b,const int __lane)18764 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18765 {
18766   return __a * __aarch64_vget_lane_any (__b, __lane);
18767 }
18768 
18769 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmuls_laneq_f32(float32_t __a,float32x4_t __b,const int __lane)18770 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18771 {
18772   return __a * __aarch64_vget_lane_any (__b, __lane);
18773 }
18774 
18775 /* vmul_laneq  */
18776 
18777 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_laneq_f32(float32x2_t __a,float32x4_t __b,const int __lane)18778 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18779 {
18780   return __a * __aarch64_vget_lane_any (__b, __lane);
18781 }
18782 
18783 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_laneq_f64(float64x1_t __a,float64x2_t __b,const int __lane)18784 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18785 {
18786   return __a * __aarch64_vget_lane_any (__b, __lane);
18787 }
18788 
18789 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_laneq_s16(int16x4_t __a,int16x8_t __b,const int __lane)18790 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18791 {
18792   return __a * __aarch64_vget_lane_any (__b, __lane);
18793 }
18794 
18795 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_laneq_s32(int32x2_t __a,int32x4_t __b,const int __lane)18796 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18797 {
18798   return __a * __aarch64_vget_lane_any (__b, __lane);
18799 }
18800 
18801 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_laneq_u16(uint16x4_t __a,uint16x8_t __b,const int __lane)18802 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18803 {
18804   return __a * __aarch64_vget_lane_any (__b, __lane);
18805 }
18806 
18807 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_laneq_u32(uint32x2_t __a,uint32x4_t __b,const int __lane)18808 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18809 {
18810   return __a * __aarch64_vget_lane_any (__b, __lane);
18811 }
18812 
18813 /* vmul_n  */
18814 
18815 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_n_f64(float64x1_t __a,float64_t __b)18816 vmul_n_f64  (float64x1_t __a, float64_t __b)
18817 {
18818   return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18819 }
18820 
18821 /* vmulq_lane  */
18822 
18823 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_lane_f32(float32x4_t __a,float32x2_t __b,const int __lane)18824 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
18825 {
18826   return __a * __aarch64_vget_lane_any (__b, __lane);
18827 }
18828 
18829 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_lane_f64(float64x2_t __a,float64x1_t __b,const int __lane)18830 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
18831 {
18832   __AARCH64_LANE_CHECK (__a, __lane);
18833   return __a * __b[0];
18834 }
18835 
18836 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_lane_s16(int16x8_t __a,int16x4_t __b,const int __lane)18837 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
18838 {
18839   return __a * __aarch64_vget_lane_any (__b, __lane);
18840 }
18841 
18842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_lane_s32(int32x4_t __a,int32x2_t __b,const int __lane)18843 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
18844 {
18845   return __a * __aarch64_vget_lane_any (__b, __lane);
18846 }
18847 
18848 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_lane_u16(uint16x8_t __a,uint16x4_t __b,const int __lane)18849 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
18850 {
18851   return __a * __aarch64_vget_lane_any (__b, __lane);
18852 }
18853 
18854 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_lane_u32(uint32x4_t __a,uint32x2_t __b,const int __lane)18855 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
18856 {
18857   return __a * __aarch64_vget_lane_any (__b, __lane);
18858 }
18859 
18860 /* vmulq_laneq  */
18861 
18862 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_laneq_f32(float32x4_t __a,float32x4_t __b,const int __lane)18863 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
18864 {
18865   return __a * __aarch64_vget_lane_any (__b, __lane);
18866 }
18867 
18868 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_laneq_f64(float64x2_t __a,float64x2_t __b,const int __lane)18869 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
18870 {
18871   return __a * __aarch64_vget_lane_any (__b, __lane);
18872 }
18873 
18874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_laneq_s16(int16x8_t __a,int16x8_t __b,const int __lane)18875 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
18876 {
18877   return __a * __aarch64_vget_lane_any (__b, __lane);
18878 }
18879 
18880 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_laneq_s32(int32x4_t __a,int32x4_t __b,const int __lane)18881 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
18882 {
18883   return __a * __aarch64_vget_lane_any (__b, __lane);
18884 }
18885 
18886 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_laneq_u16(uint16x8_t __a,uint16x8_t __b,const int __lane)18887 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
18888 {
18889   return __a * __aarch64_vget_lane_any (__b, __lane);
18890 }
18891 
18892 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_laneq_u32(uint32x4_t __a,uint32x4_t __b,const int __lane)18893 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
18894 {
18895   return __a * __aarch64_vget_lane_any (__b, __lane);
18896 }
18897 
18898 /* vneg  */
18899 
18900 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vneg_f32(float32x2_t __a)18901 vneg_f32 (float32x2_t __a)
18902 {
18903   return -__a;
18904 }
18905 
18906 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vneg_f64(float64x1_t __a)18907 vneg_f64 (float64x1_t __a)
18908 {
18909   return -__a;
18910 }
18911 
18912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vneg_s8(int8x8_t __a)18913 vneg_s8 (int8x8_t __a)
18914 {
18915   return -__a;
18916 }
18917 
18918 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vneg_s16(int16x4_t __a)18919 vneg_s16 (int16x4_t __a)
18920 {
18921   return -__a;
18922 }
18923 
18924 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vneg_s32(int32x2_t __a)18925 vneg_s32 (int32x2_t __a)
18926 {
18927   return -__a;
18928 }
18929 
18930 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vneg_s64(int64x1_t __a)18931 vneg_s64 (int64x1_t __a)
18932 {
18933   return -__a;
18934 }
18935 
18936 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vnegq_f32(float32x4_t __a)18937 vnegq_f32 (float32x4_t __a)
18938 {
18939   return -__a;
18940 }
18941 
18942 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vnegq_f64(float64x2_t __a)18943 vnegq_f64 (float64x2_t __a)
18944 {
18945   return -__a;
18946 }
18947 
18948 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vnegq_s8(int8x16_t __a)18949 vnegq_s8 (int8x16_t __a)
18950 {
18951   return -__a;
18952 }
18953 
18954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vnegq_s16(int16x8_t __a)18955 vnegq_s16 (int16x8_t __a)
18956 {
18957   return -__a;
18958 }
18959 
18960 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vnegq_s32(int32x4_t __a)18961 vnegq_s32 (int32x4_t __a)
18962 {
18963   return -__a;
18964 }
18965 
18966 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vnegq_s64(int64x2_t __a)18967 vnegq_s64 (int64x2_t __a)
18968 {
18969   return -__a;
18970 }
18971 
18972 /* vpadd  */
18973 
18974 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpadd_s8(int8x8_t __a,int8x8_t __b)18975 vpadd_s8 (int8x8_t __a, int8x8_t __b)
18976 {
18977   return __builtin_aarch64_addpv8qi (__a, __b);
18978 }
18979 
18980 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpadd_s16(int16x4_t __a,int16x4_t __b)18981 vpadd_s16 (int16x4_t __a, int16x4_t __b)
18982 {
18983   return __builtin_aarch64_addpv4hi (__a, __b);
18984 }
18985 
18986 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpadd_s32(int32x2_t __a,int32x2_t __b)18987 vpadd_s32 (int32x2_t __a, int32x2_t __b)
18988 {
18989   return __builtin_aarch64_addpv2si (__a, __b);
18990 }
18991 
18992 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpadd_u8(uint8x8_t __a,uint8x8_t __b)18993 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
18994 {
18995   return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
18996 						 (int8x8_t) __b);
18997 }
18998 
18999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpadd_u16(uint16x4_t __a,uint16x4_t __b)19000 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19001 {
19002   return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19003 						  (int16x4_t) __b);
19004 }
19005 
19006 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpadd_u32(uint32x2_t __a,uint32x2_t __b)19007 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19008 {
19009   return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19010 						  (int32x2_t) __b);
19011 }
19012 
19013 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpaddd_f64(float64x2_t __a)19014 vpaddd_f64 (float64x2_t __a)
19015 {
19016   return __builtin_aarch64_reduc_plus_scal_v2df (__a);
19017 }
19018 
19019 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vpaddd_s64(int64x2_t __a)19020 vpaddd_s64 (int64x2_t __a)
19021 {
19022   return __builtin_aarch64_addpdi (__a);
19023 }
19024 
19025 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vpaddd_u64(uint64x2_t __a)19026 vpaddd_u64 (uint64x2_t __a)
19027 {
19028   return __builtin_aarch64_addpdi ((int64x2_t) __a);
19029 }
19030 
19031 /* vqabs */
19032 
19033 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqabsq_s64(int64x2_t __a)19034 vqabsq_s64 (int64x2_t __a)
19035 {
19036   return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19037 }
19038 
19039 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqabsb_s8(int8_t __a)19040 vqabsb_s8 (int8_t __a)
19041 {
19042   return (int8_t) __builtin_aarch64_sqabsqi (__a);
19043 }
19044 
19045 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqabsh_s16(int16_t __a)19046 vqabsh_s16 (int16_t __a)
19047 {
19048   return (int16_t) __builtin_aarch64_sqabshi (__a);
19049 }
19050 
19051 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqabss_s32(int32_t __a)19052 vqabss_s32 (int32_t __a)
19053 {
19054   return (int32_t) __builtin_aarch64_sqabssi (__a);
19055 }
19056 
19057 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqabsd_s64(int64_t __a)19058 vqabsd_s64 (int64_t __a)
19059 {
19060   return __builtin_aarch64_sqabsdi (__a);
19061 }
19062 
19063 /* vqadd */
19064 
19065 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqaddb_s8(int8_t __a,int8_t __b)19066 vqaddb_s8 (int8_t __a, int8_t __b)
19067 {
19068   return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19069 }
19070 
19071 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqaddh_s16(int16_t __a,int16_t __b)19072 vqaddh_s16 (int16_t __a, int16_t __b)
19073 {
19074   return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19075 }
19076 
19077 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqadds_s32(int32_t __a,int32_t __b)19078 vqadds_s32 (int32_t __a, int32_t __b)
19079 {
19080   return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19081 }
19082 
19083 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqaddd_s64(int64_t __a,int64_t __b)19084 vqaddd_s64 (int64_t __a, int64_t __b)
19085 {
19086   return __builtin_aarch64_sqadddi (__a, __b);
19087 }
19088 
19089 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqaddb_u8(uint8_t __a,uint8_t __b)19090 vqaddb_u8 (uint8_t __a, uint8_t __b)
19091 {
19092   return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19093 }
19094 
19095 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqaddh_u16(uint16_t __a,uint16_t __b)19096 vqaddh_u16 (uint16_t __a, uint16_t __b)
19097 {
19098   return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19099 }
19100 
19101 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqadds_u32(uint32_t __a,uint32_t __b)19102 vqadds_u32 (uint32_t __a, uint32_t __b)
19103 {
19104   return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19105 }
19106 
19107 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqaddd_u64(uint64_t __a,uint64_t __b)19108 vqaddd_u64 (uint64_t __a, uint64_t __b)
19109 {
19110   return __builtin_aarch64_uqadddi_uuu (__a, __b);
19111 }
19112 
19113 /* vqdmlal */
19114 
19115 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c)19116 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19117 {
19118   return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19119 }
19120 
19121 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c)19122 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19123 {
19124   return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19125 }
19126 
19127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_lane_s16(int32x4_t __a,int16x8_t __b,int16x4_t __c,int const __d)19128 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19129 		       int const __d)
19130 {
19131   return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19132 }
19133 
19134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_laneq_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)19135 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19136 			int const __d)
19137 {
19138   return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19139 }
19140 
19141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_n_s16(int32x4_t __a,int16x8_t __b,int16_t __c)19142 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19143 {
19144   return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19145 }
19146 
19147 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_lane_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c,int const __d)19148 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19149 {
19150   return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19151 }
19152 
19153 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_laneq_s16(int32x4_t __a,int16x4_t __b,int16x8_t __c,int const __d)19154 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19155 {
19156   return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19157 }
19158 
19159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_n_s16(int32x4_t __a,int16x4_t __b,int16_t __c)19160 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19161 {
19162   return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19163 }
19164 
19165 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c)19166 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19167 {
19168   return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19169 }
19170 
19171 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c)19172 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19173 {
19174   return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19175 }
19176 
19177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_lane_s32(int64x2_t __a,int32x4_t __b,int32x2_t __c,int const __d)19178 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19179 		       int const __d)
19180 {
19181   return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19182 }
19183 
19184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_laneq_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)19185 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19186 			int const __d)
19187 {
19188   return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19189 }
19190 
19191 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_n_s32(int64x2_t __a,int32x4_t __b,int32_t __c)19192 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19193 {
19194   return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19195 }
19196 
19197 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_lane_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c,int const __d)19198 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19199 {
19200   return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19201 }
19202 
19203 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_laneq_s32(int64x2_t __a,int32x2_t __b,int32x4_t __c,int const __d)19204 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19205 {
19206   return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19207 }
19208 
19209 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_n_s32(int64x2_t __a,int32x2_t __b,int32_t __c)19210 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19211 {
19212   return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19213 }
19214 
19215 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlalh_s16(int32_t __a,int16_t __b,int16_t __c)19216 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19217 {
19218   return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19219 }
19220 
19221 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlalh_lane_s16(int32_t __a,int16_t __b,int16x4_t __c,const int __d)19222 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19223 {
19224   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19225 }
19226 
19227 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlalh_laneq_s16(int32_t __a,int16_t __b,int16x8_t __c,const int __d)19228 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19229 {
19230   return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19231 }
19232 
19233 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_s32(int64_t __a,int32_t __b,int32_t __c)19234 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
19235 {
19236   return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19237 }
19238 
19239 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_lane_s32(int64_t __a,int32_t __b,int32x2_t __c,const int __d)19240 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19241 {
19242   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19243 }
19244 
19245 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_laneq_s32(int64_t __a,int32_t __b,int32x4_t __c,const int __d)19246 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19247 {
19248   return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19249 }
19250 
19251 /* vqdmlsl */
19252 
19253 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c)19254 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19255 {
19256   return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19257 }
19258 
19259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c)19260 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19261 {
19262   return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19263 }
19264 
19265 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_lane_s16(int32x4_t __a,int16x8_t __b,int16x4_t __c,int const __d)19266 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19267 		       int const __d)
19268 {
19269   return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19270 }
19271 
19272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_laneq_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)19273 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19274 			int const __d)
19275 {
19276   return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19277 }
19278 
19279 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_n_s16(int32x4_t __a,int16x8_t __b,int16_t __c)19280 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19281 {
19282   return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19283 }
19284 
19285 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c,int const __d)19286 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19287 {
19288   return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19289 }
19290 
19291 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_laneq_s16(int32x4_t __a,int16x4_t __b,int16x8_t __c,int const __d)19292 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19293 {
19294   return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19295 }
19296 
19297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_n_s16(int32x4_t __a,int16x4_t __b,int16_t __c)19298 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19299 {
19300   return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19301 }
19302 
19303 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c)19304 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19305 {
19306   return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19307 }
19308 
19309 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c)19310 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19311 {
19312   return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19313 }
19314 
19315 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_lane_s32(int64x2_t __a,int32x4_t __b,int32x2_t __c,int const __d)19316 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19317 		       int const __d)
19318 {
19319   return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19320 }
19321 
19322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_laneq_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)19323 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19324 			int const __d)
19325 {
19326   return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19327 }
19328 
19329 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_n_s32(int64x2_t __a,int32x4_t __b,int32_t __c)19330 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19331 {
19332   return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19333 }
19334 
19335 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c,int const __d)19336 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19337 {
19338   return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19339 }
19340 
19341 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_laneq_s32(int64x2_t __a,int32x2_t __b,int32x4_t __c,int const __d)19342 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19343 {
19344   return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19345 }
19346 
19347 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_n_s32(int64x2_t __a,int32x2_t __b,int32_t __c)19348 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19349 {
19350   return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19351 }
19352 
19353 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlslh_s16(int32_t __a,int16_t __b,int16_t __c)19354 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
19355 {
19356   return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19357 }
19358 
19359 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlslh_lane_s16(int32_t __a,int16_t __b,int16x4_t __c,const int __d)19360 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19361 {
19362   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19363 }
19364 
19365 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlslh_laneq_s16(int32_t __a,int16_t __b,int16x8_t __c,const int __d)19366 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19367 {
19368   return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19369 }
19370 
19371 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_s32(int64_t __a,int32_t __b,int32_t __c)19372 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
19373 {
19374   return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19375 }
19376 
19377 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_lane_s32(int64_t __a,int32_t __b,int32x2_t __c,const int __d)19378 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19379 {
19380   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19381 }
19382 
19383 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_laneq_s32(int64_t __a,int32_t __b,int32x4_t __c,const int __d)19384 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19385 {
19386   return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19387 }
19388 
19389 /* vqdmulh */
19390 
19391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_lane_s16(int16x4_t __a,int16x4_t __b,const int __c)19392 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19393 {
19394   return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19395 }
19396 
19397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_lane_s32(int32x2_t __a,int32x2_t __b,const int __c)19398 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19399 {
19400   return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19401 }
19402 
19403 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_lane_s16(int16x8_t __a,int16x4_t __b,const int __c)19404 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19405 {
19406   return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19407 }
19408 
19409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_lane_s32(int32x4_t __a,int32x2_t __b,const int __c)19410 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19411 {
19412   return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19413 }
19414 
19415 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqdmulhh_s16(int16_t __a,int16_t __b)19416 vqdmulhh_s16 (int16_t __a, int16_t __b)
19417 {
19418   return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19419 }
19420 
19421 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqdmulhh_lane_s16(int16_t __a,int16x4_t __b,const int __c)19422 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19423 {
19424   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19425 }
19426 
19427 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqdmulhh_laneq_s16(int16_t __a,int16x8_t __b,const int __c)19428 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19429 {
19430   return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19431 }
19432 
19433 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmulhs_s32(int32_t __a,int32_t __b)19434 vqdmulhs_s32 (int32_t __a, int32_t __b)
19435 {
19436   return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19437 }
19438 
19439 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmulhs_lane_s32(int32_t __a,int32x2_t __b,const int __c)19440 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19441 {
19442   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19443 }
19444 
19445 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmulhs_laneq_s32(int32_t __a,int32x4_t __b,const int __c)19446 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19447 {
19448   return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19449 }
19450 
19451 /* vqdmull */
19452 
19453 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_s16(int16x4_t __a,int16x4_t __b)19454 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19455 {
19456   return __builtin_aarch64_sqdmullv4hi (__a, __b);
19457 }
19458 
19459 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_s16(int16x8_t __a,int16x8_t __b)19460 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19461 {
19462   return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19463 }
19464 
19465 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_lane_s16(int16x8_t __a,int16x4_t __b,int const __c)19466 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19467 {
19468   return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19469 }
19470 
19471 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_laneq_s16(int16x8_t __a,int16x8_t __b,int const __c)19472 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19473 {
19474   return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19475 }
19476 
19477 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_n_s16(int16x8_t __a,int16_t __b)19478 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19479 {
19480   return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19481 }
19482 
19483 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_lane_s16(int16x4_t __a,int16x4_t __b,int const __c)19484 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19485 {
19486   return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19487 }
19488 
19489 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_laneq_s16(int16x4_t __a,int16x8_t __b,int const __c)19490 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19491 {
19492   return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19493 }
19494 
19495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_n_s16(int16x4_t __a,int16_t __b)19496 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19497 {
19498   return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19499 }
19500 
19501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_s32(int32x2_t __a,int32x2_t __b)19502 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19503 {
19504   return __builtin_aarch64_sqdmullv2si (__a, __b);
19505 }
19506 
19507 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_s32(int32x4_t __a,int32x4_t __b)19508 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19509 {
19510   return __builtin_aarch64_sqdmull2v4si (__a, __b);
19511 }
19512 
19513 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_lane_s32(int32x4_t __a,int32x2_t __b,int const __c)19514 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19515 {
19516   return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19517 }
19518 
19519 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_laneq_s32(int32x4_t __a,int32x4_t __b,int const __c)19520 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19521 {
19522   return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19523 }
19524 
19525 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_n_s32(int32x4_t __a,int32_t __b)19526 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19527 {
19528   return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19529 }
19530 
19531 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_lane_s32(int32x2_t __a,int32x2_t __b,int const __c)19532 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19533 {
19534   return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19535 }
19536 
19537 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_laneq_s32(int32x2_t __a,int32x4_t __b,int const __c)19538 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19539 {
19540   return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19541 }
19542 
19543 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_n_s32(int32x2_t __a,int32_t __b)19544 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19545 {
19546   return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19547 }
19548 
19549 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmullh_s16(int16_t __a,int16_t __b)19550 vqdmullh_s16 (int16_t __a, int16_t __b)
19551 {
19552   return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
19553 }
19554 
19555 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmullh_lane_s16(int16_t __a,int16x4_t __b,const int __c)19556 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19557 {
19558   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19559 }
19560 
19561 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmullh_laneq_s16(int16_t __a,int16x8_t __b,const int __c)19562 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19563 {
19564   return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
19565 }
19566 
19567 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmulls_s32(int32_t __a,int32_t __b)19568 vqdmulls_s32 (int32_t __a, int32_t __b)
19569 {
19570   return __builtin_aarch64_sqdmullsi (__a, __b);
19571 }
19572 
19573 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmulls_lane_s32(int32_t __a,int32x2_t __b,const int __c)19574 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19575 {
19576   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
19577 }
19578 
19579 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmulls_laneq_s32(int32_t __a,int32x4_t __b,const int __c)19580 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19581 {
19582   return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
19583 }
19584 
19585 /* vqmovn */
19586 
19587 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqmovn_s16(int16x8_t __a)19588 vqmovn_s16 (int16x8_t __a)
19589 {
19590   return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19591 }
19592 
19593 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqmovn_s32(int32x4_t __a)19594 vqmovn_s32 (int32x4_t __a)
19595 {
19596   return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19597 }
19598 
19599 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqmovn_s64(int64x2_t __a)19600 vqmovn_s64 (int64x2_t __a)
19601 {
19602   return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19603 }
19604 
19605 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqmovn_u16(uint16x8_t __a)19606 vqmovn_u16 (uint16x8_t __a)
19607 {
19608   return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19609 }
19610 
19611 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqmovn_u32(uint32x4_t __a)19612 vqmovn_u32 (uint32x4_t __a)
19613 {
19614   return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19615 }
19616 
19617 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqmovn_u64(uint64x2_t __a)19618 vqmovn_u64 (uint64x2_t __a)
19619 {
19620   return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19621 }
19622 
19623 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqmovnh_s16(int16_t __a)19624 vqmovnh_s16 (int16_t __a)
19625 {
19626   return (int8_t) __builtin_aarch64_sqmovnhi (__a);
19627 }
19628 
19629 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqmovns_s32(int32_t __a)19630 vqmovns_s32 (int32_t __a)
19631 {
19632   return (int16_t) __builtin_aarch64_sqmovnsi (__a);
19633 }
19634 
19635 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqmovnd_s64(int64_t __a)19636 vqmovnd_s64 (int64_t __a)
19637 {
19638   return (int32_t) __builtin_aarch64_sqmovndi (__a);
19639 }
19640 
19641 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqmovnh_u16(uint16_t __a)19642 vqmovnh_u16 (uint16_t __a)
19643 {
19644   return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
19645 }
19646 
19647 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqmovns_u32(uint32_t __a)19648 vqmovns_u32 (uint32_t __a)
19649 {
19650   return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
19651 }
19652 
19653 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqmovnd_u64(uint64_t __a)19654 vqmovnd_u64 (uint64_t __a)
19655 {
19656   return (uint32_t) __builtin_aarch64_uqmovndi (__a);
19657 }
19658 
19659 /* vqmovun */
19660 
19661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqmovun_s16(int16x8_t __a)19662 vqmovun_s16 (int16x8_t __a)
19663 {
19664   return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19665 }
19666 
19667 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqmovun_s32(int32x4_t __a)19668 vqmovun_s32 (int32x4_t __a)
19669 {
19670   return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19671 }
19672 
19673 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqmovun_s64(int64x2_t __a)19674 vqmovun_s64 (int64x2_t __a)
19675 {
19676   return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19677 }
19678 
19679 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqmovunh_s16(int16_t __a)19680 vqmovunh_s16 (int16_t __a)
19681 {
19682   return (int8_t) __builtin_aarch64_sqmovunhi (__a);
19683 }
19684 
19685 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqmovuns_s32(int32_t __a)19686 vqmovuns_s32 (int32_t __a)
19687 {
19688   return (int16_t) __builtin_aarch64_sqmovunsi (__a);
19689 }
19690 
19691 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqmovund_s64(int64_t __a)19692 vqmovund_s64 (int64_t __a)
19693 {
19694   return (int32_t) __builtin_aarch64_sqmovundi (__a);
19695 }
19696 
19697 /* vqneg */
19698 
19699 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqnegq_s64(int64x2_t __a)19700 vqnegq_s64 (int64x2_t __a)
19701 {
19702   return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19703 }
19704 
19705 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqnegb_s8(int8_t __a)19706 vqnegb_s8 (int8_t __a)
19707 {
19708   return (int8_t) __builtin_aarch64_sqnegqi (__a);
19709 }
19710 
19711 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqnegh_s16(int16_t __a)19712 vqnegh_s16 (int16_t __a)
19713 {
19714   return (int16_t) __builtin_aarch64_sqneghi (__a);
19715 }
19716 
19717 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqnegs_s32(int32_t __a)19718 vqnegs_s32 (int32_t __a)
19719 {
19720   return (int32_t) __builtin_aarch64_sqnegsi (__a);
19721 }
19722 
19723 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqnegd_s64(int64_t __a)19724 vqnegd_s64 (int64_t __a)
19725 {
19726   return __builtin_aarch64_sqnegdi (__a);
19727 }
19728 
19729 /* vqrdmulh */
19730 
19731 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_lane_s16(int16x4_t __a,int16x4_t __b,const int __c)19732 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19733 {
19734   return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19735 }
19736 
19737 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_lane_s32(int32x2_t __a,int32x2_t __b,const int __c)19738 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19739 {
19740   return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19741 }
19742 
19743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_lane_s16(int16x8_t __a,int16x4_t __b,const int __c)19744 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19745 {
19746   return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19747 }
19748 
19749 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_lane_s32(int32x4_t __a,int32x2_t __b,const int __c)19750 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19751 {
19752   return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19753 }
19754 
19755 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmulhh_s16(int16_t __a,int16_t __b)19756 vqrdmulhh_s16 (int16_t __a, int16_t __b)
19757 {
19758   return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19759 }
19760 
19761 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmulhh_lane_s16(int16_t __a,int16x4_t __b,const int __c)19762 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19763 {
19764   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19765 }
19766 
19767 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmulhh_laneq_s16(int16_t __a,int16x8_t __b,const int __c)19768 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19769 {
19770   return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19771 }
19772 
19773 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmulhs_s32(int32_t __a,int32_t __b)19774 vqrdmulhs_s32 (int32_t __a, int32_t __b)
19775 {
19776   return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19777 }
19778 
19779 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmulhs_lane_s32(int32_t __a,int32x2_t __b,const int __c)19780 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19781 {
19782   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19783 }
19784 
19785 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmulhs_laneq_s32(int32_t __a,int32x4_t __b,const int __c)19786 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19787 {
19788   return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19789 }
19790 
19791 /* vqrshl */
19792 
19793 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqrshl_s8(int8x8_t __a,int8x8_t __b)19794 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19795 {
19796   return __builtin_aarch64_sqrshlv8qi (__a, __b);
19797 }
19798 
19799 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrshl_s16(int16x4_t __a,int16x4_t __b)19800 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19801 {
19802   return __builtin_aarch64_sqrshlv4hi (__a, __b);
19803 }
19804 
19805 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrshl_s32(int32x2_t __a,int32x2_t __b)19806 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19807 {
19808   return __builtin_aarch64_sqrshlv2si (__a, __b);
19809 }
19810 
19811 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqrshl_s64(int64x1_t __a,int64x1_t __b)19812 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19813 {
19814   return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19815 }
19816 
19817 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshl_u8(uint8x8_t __a,int8x8_t __b)19818 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19819 {
19820   return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19821 }
19822 
19823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshl_u16(uint16x4_t __a,int16x4_t __b)19824 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19825 {
19826   return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19827 }
19828 
19829 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshl_u32(uint32x2_t __a,int32x2_t __b)19830 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19831 {
19832   return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19833 }
19834 
19835 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqrshl_u64(uint64x1_t __a,int64x1_t __b)19836 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19837 {
19838   return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19839 }
19840 
19841 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqrshlq_s8(int8x16_t __a,int8x16_t __b)19842 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19843 {
19844   return __builtin_aarch64_sqrshlv16qi (__a, __b);
19845 }
19846 
19847 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrshlq_s16(int16x8_t __a,int16x8_t __b)19848 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19849 {
19850   return __builtin_aarch64_sqrshlv8hi (__a, __b);
19851 }
19852 
19853 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrshlq_s32(int32x4_t __a,int32x4_t __b)19854 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19855 {
19856   return __builtin_aarch64_sqrshlv4si (__a, __b);
19857 }
19858 
19859 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqrshlq_s64(int64x2_t __a,int64x2_t __b)19860 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
19861 {
19862   return __builtin_aarch64_sqrshlv2di (__a, __b);
19863 }
19864 
19865 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqrshlq_u8(uint8x16_t __a,int8x16_t __b)19866 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
19867 {
19868   return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
19869 }
19870 
19871 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqrshlq_u16(uint16x8_t __a,int16x8_t __b)19872 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
19873 {
19874   return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
19875 }
19876 
19877 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqrshlq_u32(uint32x4_t __a,int32x4_t __b)19878 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
19879 {
19880   return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
19881 }
19882 
19883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqrshlq_u64(uint64x2_t __a,int64x2_t __b)19884 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
19885 {
19886   return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
19887 }
19888 
19889 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqrshlb_s8(int8_t __a,int8_t __b)19890 vqrshlb_s8 (int8_t __a, int8_t __b)
19891 {
19892   return __builtin_aarch64_sqrshlqi (__a, __b);
19893 }
19894 
19895 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrshlh_s16(int16_t __a,int16_t __b)19896 vqrshlh_s16 (int16_t __a, int16_t __b)
19897 {
19898   return __builtin_aarch64_sqrshlhi (__a, __b);
19899 }
19900 
19901 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrshls_s32(int32_t __a,int32_t __b)19902 vqrshls_s32 (int32_t __a, int32_t __b)
19903 {
19904   return __builtin_aarch64_sqrshlsi (__a, __b);
19905 }
19906 
19907 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqrshld_s64(int64_t __a,int64_t __b)19908 vqrshld_s64 (int64_t __a, int64_t __b)
19909 {
19910   return __builtin_aarch64_sqrshldi (__a, __b);
19911 }
19912 
19913 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqrshlb_u8(uint8_t __a,uint8_t __b)19914 vqrshlb_u8 (uint8_t __a, uint8_t __b)
19915 {
19916   return __builtin_aarch64_uqrshlqi_uus (__a, __b);
19917 }
19918 
19919 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqrshlh_u16(uint16_t __a,uint16_t __b)19920 vqrshlh_u16 (uint16_t __a, uint16_t __b)
19921 {
19922   return __builtin_aarch64_uqrshlhi_uus (__a, __b);
19923 }
19924 
19925 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqrshls_u32(uint32_t __a,uint32_t __b)19926 vqrshls_u32 (uint32_t __a, uint32_t __b)
19927 {
19928   return __builtin_aarch64_uqrshlsi_uus (__a, __b);
19929 }
19930 
19931 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqrshld_u64(uint64_t __a,uint64_t __b)19932 vqrshld_u64 (uint64_t __a, uint64_t __b)
19933 {
19934   return __builtin_aarch64_uqrshldi_uus (__a, __b);
19935 }
19936 
19937 /* vqrshrn */
19938 
19939 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqrshrn_n_s16(int16x8_t __a,const int __b)19940 vqrshrn_n_s16 (int16x8_t __a, const int __b)
19941 {
19942   return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
19943 }
19944 
19945 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrshrn_n_s32(int32x4_t __a,const int __b)19946 vqrshrn_n_s32 (int32x4_t __a, const int __b)
19947 {
19948   return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
19949 }
19950 
19951 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrshrn_n_s64(int64x2_t __a,const int __b)19952 vqrshrn_n_s64 (int64x2_t __a, const int __b)
19953 {
19954   return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
19955 }
19956 
19957 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshrn_n_u16(uint16x8_t __a,const int __b)19958 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
19959 {
19960   return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
19961 }
19962 
19963 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshrn_n_u32(uint32x4_t __a,const int __b)19964 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
19965 {
19966   return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
19967 }
19968 
19969 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshrn_n_u64(uint64x2_t __a,const int __b)19970 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
19971 {
19972   return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
19973 }
19974 
19975 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqrshrnh_n_s16(int16_t __a,const int __b)19976 vqrshrnh_n_s16 (int16_t __a, const int __b)
19977 {
19978   return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
19979 }
19980 
19981 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrshrns_n_s32(int32_t __a,const int __b)19982 vqrshrns_n_s32 (int32_t __a, const int __b)
19983 {
19984   return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
19985 }
19986 
19987 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrshrnd_n_s64(int64_t __a,const int __b)19988 vqrshrnd_n_s64 (int64_t __a, const int __b)
19989 {
19990   return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
19991 }
19992 
19993 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqrshrnh_n_u16(uint16_t __a,const int __b)19994 vqrshrnh_n_u16 (uint16_t __a, const int __b)
19995 {
19996   return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
19997 }
19998 
19999 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqrshrns_n_u32(uint32_t __a,const int __b)20000 vqrshrns_n_u32 (uint32_t __a, const int __b)
20001 {
20002   return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20003 }
20004 
20005 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqrshrnd_n_u64(uint64_t __a,const int __b)20006 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20007 {
20008   return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20009 }
20010 
20011 /* vqrshrun */
20012 
20013 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshrun_n_s16(int16x8_t __a,const int __b)20014 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20015 {
20016   return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20017 }
20018 
20019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshrun_n_s32(int32x4_t __a,const int __b)20020 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20021 {
20022   return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20023 }
20024 
20025 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshrun_n_s64(int64x2_t __a,const int __b)20026 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20027 {
20028   return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20029 }
20030 
20031 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqrshrunh_n_s16(int16_t __a,const int __b)20032 vqrshrunh_n_s16 (int16_t __a, const int __b)
20033 {
20034   return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20035 }
20036 
20037 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrshruns_n_s32(int32_t __a,const int __b)20038 vqrshruns_n_s32 (int32_t __a, const int __b)
20039 {
20040   return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20041 }
20042 
20043 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrshrund_n_s64(int64_t __a,const int __b)20044 vqrshrund_n_s64 (int64_t __a, const int __b)
20045 {
20046   return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20047 }
20048 
20049 /* vqshl */
20050 
20051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshl_s8(int8x8_t __a,int8x8_t __b)20052 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20053 {
20054   return __builtin_aarch64_sqshlv8qi (__a, __b);
20055 }
20056 
20057 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshl_s16(int16x4_t __a,int16x4_t __b)20058 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20059 {
20060   return __builtin_aarch64_sqshlv4hi (__a, __b);
20061 }
20062 
20063 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshl_s32(int32x2_t __a,int32x2_t __b)20064 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20065 {
20066   return __builtin_aarch64_sqshlv2si (__a, __b);
20067 }
20068 
20069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshl_s64(int64x1_t __a,int64x1_t __b)20070 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20071 {
20072   return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20073 }
20074 
20075 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshl_u8(uint8x8_t __a,int8x8_t __b)20076 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20077 {
20078   return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20079 }
20080 
20081 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshl_u16(uint16x4_t __a,int16x4_t __b)20082 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20083 {
20084   return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20085 }
20086 
20087 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshl_u32(uint32x2_t __a,int32x2_t __b)20088 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20089 {
20090   return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20091 }
20092 
20093 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshl_u64(uint64x1_t __a,int64x1_t __b)20094 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20095 {
20096   return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20097 }
20098 
20099 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqshlq_s8(int8x16_t __a,int8x16_t __b)20100 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20101 {
20102   return __builtin_aarch64_sqshlv16qi (__a, __b);
20103 }
20104 
20105 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqshlq_s16(int16x8_t __a,int16x8_t __b)20106 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20107 {
20108   return __builtin_aarch64_sqshlv8hi (__a, __b);
20109 }
20110 
20111 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqshlq_s32(int32x4_t __a,int32x4_t __b)20112 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20113 {
20114   return __builtin_aarch64_sqshlv4si (__a, __b);
20115 }
20116 
20117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqshlq_s64(int64x2_t __a,int64x2_t __b)20118 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20119 {
20120   return __builtin_aarch64_sqshlv2di (__a, __b);
20121 }
20122 
20123 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshlq_u8(uint8x16_t __a,int8x16_t __b)20124 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20125 {
20126   return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20127 }
20128 
20129 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshlq_u16(uint16x8_t __a,int16x8_t __b)20130 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20131 {
20132   return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20133 }
20134 
20135 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshlq_u32(uint32x4_t __a,int32x4_t __b)20136 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20137 {
20138   return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20139 }
20140 
20141 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshlq_u64(uint64x2_t __a,int64x2_t __b)20142 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20143 {
20144   return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20145 }
20146 
20147 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqshlb_s8(int8_t __a,int8_t __b)20148 vqshlb_s8 (int8_t __a, int8_t __b)
20149 {
20150   return __builtin_aarch64_sqshlqi (__a, __b);
20151 }
20152 
20153 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqshlh_s16(int16_t __a,int16_t __b)20154 vqshlh_s16 (int16_t __a, int16_t __b)
20155 {
20156   return __builtin_aarch64_sqshlhi (__a, __b);
20157 }
20158 
20159 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshls_s32(int32_t __a,int32_t __b)20160 vqshls_s32 (int32_t __a, int32_t __b)
20161 {
20162   return __builtin_aarch64_sqshlsi (__a, __b);
20163 }
20164 
20165 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqshld_s64(int64_t __a,int64_t __b)20166 vqshld_s64 (int64_t __a, int64_t __b)
20167 {
20168   return __builtin_aarch64_sqshldi (__a, __b);
20169 }
20170 
20171 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqshlb_u8(uint8_t __a,uint8_t __b)20172 vqshlb_u8 (uint8_t __a, uint8_t __b)
20173 {
20174   return __builtin_aarch64_uqshlqi_uus (__a, __b);
20175 }
20176 
20177 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqshlh_u16(uint16_t __a,uint16_t __b)20178 vqshlh_u16 (uint16_t __a, uint16_t __b)
20179 {
20180   return __builtin_aarch64_uqshlhi_uus (__a, __b);
20181 }
20182 
20183 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqshls_u32(uint32_t __a,uint32_t __b)20184 vqshls_u32 (uint32_t __a, uint32_t __b)
20185 {
20186   return __builtin_aarch64_uqshlsi_uus (__a, __b);
20187 }
20188 
20189 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqshld_u64(uint64_t __a,uint64_t __b)20190 vqshld_u64 (uint64_t __a, uint64_t __b)
20191 {
20192   return __builtin_aarch64_uqshldi_uus (__a, __b);
20193 }
20194 
20195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshl_n_s8(int8x8_t __a,const int __b)20196 vqshl_n_s8 (int8x8_t __a, const int __b)
20197 {
20198   return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20199 }
20200 
20201 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshl_n_s16(int16x4_t __a,const int __b)20202 vqshl_n_s16 (int16x4_t __a, const int __b)
20203 {
20204   return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20205 }
20206 
20207 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshl_n_s32(int32x2_t __a,const int __b)20208 vqshl_n_s32 (int32x2_t __a, const int __b)
20209 {
20210   return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20211 }
20212 
20213 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshl_n_s64(int64x1_t __a,const int __b)20214 vqshl_n_s64 (int64x1_t __a, const int __b)
20215 {
20216   return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20217 }
20218 
20219 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshl_n_u8(uint8x8_t __a,const int __b)20220 vqshl_n_u8 (uint8x8_t __a, const int __b)
20221 {
20222   return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20223 }
20224 
20225 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshl_n_u16(uint16x4_t __a,const int __b)20226 vqshl_n_u16 (uint16x4_t __a, const int __b)
20227 {
20228   return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20229 }
20230 
20231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshl_n_u32(uint32x2_t __a,const int __b)20232 vqshl_n_u32 (uint32x2_t __a, const int __b)
20233 {
20234   return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20235 }
20236 
20237 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshl_n_u64(uint64x1_t __a,const int __b)20238 vqshl_n_u64 (uint64x1_t __a, const int __b)
20239 {
20240   return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20241 }
20242 
20243 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqshlq_n_s8(int8x16_t __a,const int __b)20244 vqshlq_n_s8 (int8x16_t __a, const int __b)
20245 {
20246   return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20247 }
20248 
20249 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqshlq_n_s16(int16x8_t __a,const int __b)20250 vqshlq_n_s16 (int16x8_t __a, const int __b)
20251 {
20252   return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20253 }
20254 
20255 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqshlq_n_s32(int32x4_t __a,const int __b)20256 vqshlq_n_s32 (int32x4_t __a, const int __b)
20257 {
20258   return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20259 }
20260 
20261 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqshlq_n_s64(int64x2_t __a,const int __b)20262 vqshlq_n_s64 (int64x2_t __a, const int __b)
20263 {
20264   return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20265 }
20266 
20267 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshlq_n_u8(uint8x16_t __a,const int __b)20268 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20269 {
20270   return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20271 }
20272 
20273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshlq_n_u16(uint16x8_t __a,const int __b)20274 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20275 {
20276   return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20277 }
20278 
20279 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshlq_n_u32(uint32x4_t __a,const int __b)20280 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20281 {
20282   return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20283 }
20284 
20285 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshlq_n_u64(uint64x2_t __a,const int __b)20286 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20287 {
20288   return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20289 }
20290 
20291 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqshlb_n_s8(int8_t __a,const int __b)20292 vqshlb_n_s8 (int8_t __a, const int __b)
20293 {
20294   return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20295 }
20296 
20297 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqshlh_n_s16(int16_t __a,const int __b)20298 vqshlh_n_s16 (int16_t __a, const int __b)
20299 {
20300   return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20301 }
20302 
20303 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshls_n_s32(int32_t __a,const int __b)20304 vqshls_n_s32 (int32_t __a, const int __b)
20305 {
20306   return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20307 }
20308 
20309 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqshld_n_s64(int64_t __a,const int __b)20310 vqshld_n_s64 (int64_t __a, const int __b)
20311 {
20312   return __builtin_aarch64_sqshl_ndi (__a, __b);
20313 }
20314 
20315 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqshlb_n_u8(uint8_t __a,const int __b)20316 vqshlb_n_u8 (uint8_t __a, const int __b)
20317 {
20318   return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20319 }
20320 
20321 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqshlh_n_u16(uint16_t __a,const int __b)20322 vqshlh_n_u16 (uint16_t __a, const int __b)
20323 {
20324   return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20325 }
20326 
20327 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqshls_n_u32(uint32_t __a,const int __b)20328 vqshls_n_u32 (uint32_t __a, const int __b)
20329 {
20330   return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20331 }
20332 
20333 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqshld_n_u64(uint64_t __a,const int __b)20334 vqshld_n_u64 (uint64_t __a, const int __b)
20335 {
20336   return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20337 }
20338 
20339 /* vqshlu */
20340 
20341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshlu_n_s8(int8x8_t __a,const int __b)20342 vqshlu_n_s8 (int8x8_t __a, const int __b)
20343 {
20344   return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20345 }
20346 
20347 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshlu_n_s16(int16x4_t __a,const int __b)20348 vqshlu_n_s16 (int16x4_t __a, const int __b)
20349 {
20350   return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20351 }
20352 
20353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshlu_n_s32(int32x2_t __a,const int __b)20354 vqshlu_n_s32 (int32x2_t __a, const int __b)
20355 {
20356   return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20357 }
20358 
20359 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshlu_n_s64(int64x1_t __a,const int __b)20360 vqshlu_n_s64 (int64x1_t __a, const int __b)
20361 {
20362   return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20363 }
20364 
20365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshluq_n_s8(int8x16_t __a,const int __b)20366 vqshluq_n_s8 (int8x16_t __a, const int __b)
20367 {
20368   return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20369 }
20370 
20371 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshluq_n_s16(int16x8_t __a,const int __b)20372 vqshluq_n_s16 (int16x8_t __a, const int __b)
20373 {
20374   return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20375 }
20376 
20377 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshluq_n_s32(int32x4_t __a,const int __b)20378 vqshluq_n_s32 (int32x4_t __a, const int __b)
20379 {
20380   return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20381 }
20382 
20383 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshluq_n_s64(int64x2_t __a,const int __b)20384 vqshluq_n_s64 (int64x2_t __a, const int __b)
20385 {
20386   return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20387 }
20388 
20389 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqshlub_n_s8(int8_t __a,const int __b)20390 vqshlub_n_s8 (int8_t __a, const int __b)
20391 {
20392   return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20393 }
20394 
20395 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqshluh_n_s16(int16_t __a,const int __b)20396 vqshluh_n_s16 (int16_t __a, const int __b)
20397 {
20398   return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20399 }
20400 
20401 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshlus_n_s32(int32_t __a,const int __b)20402 vqshlus_n_s32 (int32_t __a, const int __b)
20403 {
20404   return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20405 }
20406 
20407 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqshlud_n_s64(int64_t __a,const int __b)20408 vqshlud_n_s64 (int64_t __a, const int __b)
20409 {
20410   return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20411 }
20412 
20413 /* vqshrn */
20414 
20415 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshrn_n_s16(int16x8_t __a,const int __b)20416 vqshrn_n_s16 (int16x8_t __a, const int __b)
20417 {
20418   return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20419 }
20420 
20421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshrn_n_s32(int32x4_t __a,const int __b)20422 vqshrn_n_s32 (int32x4_t __a, const int __b)
20423 {
20424   return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20425 }
20426 
20427 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshrn_n_s64(int64x2_t __a,const int __b)20428 vqshrn_n_s64 (int64x2_t __a, const int __b)
20429 {
20430   return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20431 }
20432 
20433 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshrn_n_u16(uint16x8_t __a,const int __b)20434 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20435 {
20436   return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20437 }
20438 
20439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshrn_n_u32(uint32x4_t __a,const int __b)20440 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20441 {
20442   return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20443 }
20444 
20445 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshrn_n_u64(uint64x2_t __a,const int __b)20446 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20447 {
20448   return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20449 }
20450 
20451 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqshrnh_n_s16(int16_t __a,const int __b)20452 vqshrnh_n_s16 (int16_t __a, const int __b)
20453 {
20454   return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20455 }
20456 
20457 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqshrns_n_s32(int32_t __a,const int __b)20458 vqshrns_n_s32 (int32_t __a, const int __b)
20459 {
20460   return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20461 }
20462 
20463 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshrnd_n_s64(int64_t __a,const int __b)20464 vqshrnd_n_s64 (int64_t __a, const int __b)
20465 {
20466   return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20467 }
20468 
20469 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqshrnh_n_u16(uint16_t __a,const int __b)20470 vqshrnh_n_u16 (uint16_t __a, const int __b)
20471 {
20472   return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20473 }
20474 
20475 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqshrns_n_u32(uint32_t __a,const int __b)20476 vqshrns_n_u32 (uint32_t __a, const int __b)
20477 {
20478   return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20479 }
20480 
20481 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqshrnd_n_u64(uint64_t __a,const int __b)20482 vqshrnd_n_u64 (uint64_t __a, const int __b)
20483 {
20484   return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20485 }
20486 
20487 /* vqshrun */
20488 
20489 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshrun_n_s16(int16x8_t __a,const int __b)20490 vqshrun_n_s16 (int16x8_t __a, const int __b)
20491 {
20492   return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20493 }
20494 
20495 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshrun_n_s32(int32x4_t __a,const int __b)20496 vqshrun_n_s32 (int32x4_t __a, const int __b)
20497 {
20498   return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20499 }
20500 
20501 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshrun_n_s64(int64x2_t __a,const int __b)20502 vqshrun_n_s64 (int64x2_t __a, const int __b)
20503 {
20504   return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20505 }
20506 
20507 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqshrunh_n_s16(int16_t __a,const int __b)20508 vqshrunh_n_s16 (int16_t __a, const int __b)
20509 {
20510   return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20511 }
20512 
20513 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqshruns_n_s32(int32_t __a,const int __b)20514 vqshruns_n_s32 (int32_t __a, const int __b)
20515 {
20516   return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20517 }
20518 
20519 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshrund_n_s64(int64_t __a,const int __b)20520 vqshrund_n_s64 (int64_t __a, const int __b)
20521 {
20522   return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20523 }
20524 
20525 /* vqsub */
20526 
20527 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vqsubb_s8(int8_t __a,int8_t __b)20528 vqsubb_s8 (int8_t __a, int8_t __b)
20529 {
20530   return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
20531 }
20532 
20533 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqsubh_s16(int16_t __a,int16_t __b)20534 vqsubh_s16 (int16_t __a, int16_t __b)
20535 {
20536   return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
20537 }
20538 
20539 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqsubs_s32(int32_t __a,int32_t __b)20540 vqsubs_s32 (int32_t __a, int32_t __b)
20541 {
20542   return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
20543 }
20544 
20545 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqsubd_s64(int64_t __a,int64_t __b)20546 vqsubd_s64 (int64_t __a, int64_t __b)
20547 {
20548   return __builtin_aarch64_sqsubdi (__a, __b);
20549 }
20550 
20551 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vqsubb_u8(uint8_t __a,uint8_t __b)20552 vqsubb_u8 (uint8_t __a, uint8_t __b)
20553 {
20554   return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20555 }
20556 
20557 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vqsubh_u16(uint16_t __a,uint16_t __b)20558 vqsubh_u16 (uint16_t __a, uint16_t __b)
20559 {
20560   return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20561 }
20562 
20563 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqsubs_u32(uint32_t __a,uint32_t __b)20564 vqsubs_u32 (uint32_t __a, uint32_t __b)
20565 {
20566   return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20567 }
20568 
20569 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vqsubd_u64(uint64_t __a,uint64_t __b)20570 vqsubd_u64 (uint64_t __a, uint64_t __b)
20571 {
20572   return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20573 }
20574 
20575 /* vqtbl2 */
20576 
20577 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl2_s8(int8x16x2_t tab,uint8x8_t idx)20578 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
20579 {
20580   __builtin_aarch64_simd_oi __o;
20581   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20582   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20583   return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20584 }
20585 
20586 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl2_u8(uint8x16x2_t tab,uint8x8_t idx)20587 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
20588 {
20589   __builtin_aarch64_simd_oi __o;
20590   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20591   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20592   return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20593 }
20594 
20595 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl2_p8(poly8x16x2_t tab,uint8x8_t idx)20596 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
20597 {
20598   __builtin_aarch64_simd_oi __o;
20599   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20600   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20601   return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20602 }
20603 
20604 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl2q_s8(int8x16x2_t tab,uint8x16_t idx)20605 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
20606 {
20607   __builtin_aarch64_simd_oi __o;
20608   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20609   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20610   return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20611 }
20612 
20613 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl2q_u8(uint8x16x2_t tab,uint8x16_t idx)20614 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
20615 {
20616   __builtin_aarch64_simd_oi __o;
20617   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20618   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20619   return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20620 }
20621 
20622 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl2q_p8(poly8x16x2_t tab,uint8x16_t idx)20623 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
20624 {
20625   __builtin_aarch64_simd_oi __o;
20626   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20627   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20628   return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20629 }
20630 
20631 /* vqtbl3 */
20632 
20633 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl3_s8(int8x16x3_t tab,uint8x8_t idx)20634 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
20635 {
20636   __builtin_aarch64_simd_ci __o;
20637   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20638   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20639   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20640   return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20641 }
20642 
20643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl3_u8(uint8x16x3_t tab,uint8x8_t idx)20644 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
20645 {
20646   __builtin_aarch64_simd_ci __o;
20647   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20648   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20649   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20650   return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20651 }
20652 
20653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl3_p8(poly8x16x3_t tab,uint8x8_t idx)20654 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
20655 {
20656   __builtin_aarch64_simd_ci __o;
20657   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20658   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20659   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20660   return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20661 }
20662 
20663 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl3q_s8(int8x16x3_t tab,uint8x16_t idx)20664 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
20665 {
20666   __builtin_aarch64_simd_ci __o;
20667   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20668   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20669   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20670   return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20671 }
20672 
20673 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl3q_u8(uint8x16x3_t tab,uint8x16_t idx)20674 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
20675 {
20676   __builtin_aarch64_simd_ci __o;
20677   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20678   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20679   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20680   return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20681 }
20682 
20683 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl3q_p8(poly8x16x3_t tab,uint8x16_t idx)20684 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
20685 {
20686   __builtin_aarch64_simd_ci __o;
20687   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20688   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20689   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20690   return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20691 }
20692 
20693 /* vqtbl4 */
20694 
20695 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl4_s8(int8x16x4_t tab,uint8x8_t idx)20696 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
20697 {
20698   __builtin_aarch64_simd_xi __o;
20699   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20700   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20701   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20702   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20703   return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20704 }
20705 
20706 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl4_u8(uint8x16x4_t tab,uint8x8_t idx)20707 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
20708 {
20709   __builtin_aarch64_simd_xi __o;
20710   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20711   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20712   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20713   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20714   return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20715 }
20716 
20717 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl4_p8(poly8x16x4_t tab,uint8x8_t idx)20718 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
20719 {
20720   __builtin_aarch64_simd_xi __o;
20721   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20722   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20723   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20724   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20725   return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20726 }
20727 
20728 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl4q_s8(int8x16x4_t tab,uint8x16_t idx)20729 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
20730 {
20731   __builtin_aarch64_simd_xi __o;
20732   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20733   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20734   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20735   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20736   return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20737 }
20738 
20739 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl4q_u8(uint8x16x4_t tab,uint8x16_t idx)20740 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
20741 {
20742   __builtin_aarch64_simd_xi __o;
20743   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20744   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20745   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20746   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20747   return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20748 }
20749 
20750 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl4q_p8(poly8x16x4_t tab,uint8x16_t idx)20751 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
20752 {
20753   __builtin_aarch64_simd_xi __o;
20754   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20755   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20756   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20757   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20758   return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20759 }
20760 
20761 
20762 /* vqtbx2 */
20763 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx2_s8(int8x8_t r,int8x16x2_t tab,uint8x8_t idx)20764 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
20765 {
20766   __builtin_aarch64_simd_oi __o;
20767   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20768   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20769   return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx);
20770 }
20771 
20772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx2_u8(uint8x8_t r,uint8x16x2_t tab,uint8x8_t idx)20773 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
20774 {
20775   __builtin_aarch64_simd_oi __o;
20776   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20777   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20778   return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20779 						(int8x8_t)idx);
20780 }
20781 
20782 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx2_p8(poly8x8_t r,poly8x16x2_t tab,uint8x8_t idx)20783 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
20784 {
20785   __builtin_aarch64_simd_oi __o;
20786   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20787   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20788   return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20789 						(int8x8_t)idx);
20790 }
20791 
20792 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx2q_s8(int8x16_t r,int8x16x2_t tab,uint8x16_t idx)20793 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
20794 {
20795   __builtin_aarch64_simd_oi __o;
20796   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20797   __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20798   return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx);
20799 }
20800 
20801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx2q_u8(uint8x16_t r,uint8x16x2_t tab,uint8x16_t idx)20802 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
20803 {
20804   __builtin_aarch64_simd_oi __o;
20805   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20806   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20807   return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20808 						  (int8x16_t)idx);
20809 }
20810 
20811 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx2q_p8(poly8x16_t r,poly8x16x2_t tab,uint8x16_t idx)20812 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
20813 {
20814   __builtin_aarch64_simd_oi __o;
20815   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20816   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20817   return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20818 						  (int8x16_t)idx);
20819 }
20820 
20821 /* vqtbx3 */
20822 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx3_s8(int8x8_t r,int8x16x3_t tab,uint8x8_t idx)20823 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
20824 {
20825   __builtin_aarch64_simd_ci __o;
20826   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20827   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20828   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20829   return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx);
20830 }
20831 
20832 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx3_u8(uint8x8_t r,uint8x16x3_t tab,uint8x8_t idx)20833 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
20834 {
20835   __builtin_aarch64_simd_ci __o;
20836   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20837   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20838   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20839   return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20840 						 (int8x8_t)idx);
20841 }
20842 
20843 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx3_p8(poly8x8_t r,poly8x16x3_t tab,uint8x8_t idx)20844 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
20845 {
20846   __builtin_aarch64_simd_ci __o;
20847   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20848   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20849   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20850   return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20851 						 (int8x8_t)idx);
20852 }
20853 
20854 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx3q_s8(int8x16_t r,int8x16x3_t tab,uint8x16_t idx)20855 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
20856 {
20857   __builtin_aarch64_simd_ci __o;
20858   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20859   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20860   __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20861   return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx);
20862 }
20863 
20864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx3q_u8(uint8x16_t r,uint8x16x3_t tab,uint8x16_t idx)20865 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
20866 {
20867   __builtin_aarch64_simd_ci __o;
20868   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20869   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20870   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20871   return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20872 						   (int8x16_t)idx);
20873 }
20874 
20875 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx3q_p8(poly8x16_t r,poly8x16x3_t tab,uint8x16_t idx)20876 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
20877 {
20878   __builtin_aarch64_simd_ci __o;
20879   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20880   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20881   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20882   return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20883 						   (int8x16_t)idx);
20884 }
20885 
20886 /* vqtbx4 */
20887 
20888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx4_s8(int8x8_t r,int8x16x4_t tab,uint8x8_t idx)20889 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
20890 {
20891   __builtin_aarch64_simd_xi __o;
20892   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20893   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20894   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20895   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20896   return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx);
20897 }
20898 
20899 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx4_u8(uint8x8_t r,uint8x16x4_t tab,uint8x8_t idx)20900 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
20901 {
20902   __builtin_aarch64_simd_xi __o;
20903   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20904   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20905   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20906   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20907   return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20908 						 (int8x8_t)idx);
20909 }
20910 
20911 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx4_p8(poly8x8_t r,poly8x16x4_t tab,uint8x8_t idx)20912 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
20913 {
20914   __builtin_aarch64_simd_xi __o;
20915   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20916   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20917   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20918   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20919   return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20920 						 (int8x8_t)idx);
20921 }
20922 
20923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx4q_s8(int8x16_t r,int8x16x4_t tab,uint8x16_t idx)20924 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
20925 {
20926   __builtin_aarch64_simd_xi __o;
20927   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20928   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20929   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20930   __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20931   return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx);
20932 }
20933 
20934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx4q_u8(uint8x16_t r,uint8x16x4_t tab,uint8x16_t idx)20935 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
20936 {
20937   __builtin_aarch64_simd_xi __o;
20938   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20939   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20940   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20941   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20942   return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20943 						   (int8x16_t)idx);
20944 }
20945 
20946 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx4q_p8(poly8x16_t r,poly8x16x4_t tab,uint8x16_t idx)20947 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
20948 {
20949   __builtin_aarch64_simd_xi __o;
20950   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20951   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20952   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20953   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20954   return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20955 						   (int8x16_t)idx);
20956 }
20957 
20958 /* vrbit  */
20959 
20960 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrbit_p8(poly8x8_t __a)20961 vrbit_p8 (poly8x8_t __a)
20962 {
20963   return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20964 }
20965 
20966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrbit_s8(int8x8_t __a)20967 vrbit_s8 (int8x8_t __a)
20968 {
20969   return __builtin_aarch64_rbitv8qi (__a);
20970 }
20971 
20972 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrbit_u8(uint8x8_t __a)20973 vrbit_u8 (uint8x8_t __a)
20974 {
20975   return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20976 }
20977 
20978 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrbitq_p8(poly8x16_t __a)20979 vrbitq_p8 (poly8x16_t __a)
20980 {
20981   return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
20982 }
20983 
20984 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrbitq_s8(int8x16_t __a)20985 vrbitq_s8 (int8x16_t __a)
20986 {
20987   return __builtin_aarch64_rbitv16qi (__a);
20988 }
20989 
20990 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrbitq_u8(uint8x16_t __a)20991 vrbitq_u8 (uint8x16_t __a)
20992 {
20993   return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
20994 }
20995 
20996 /* vrecpe  */
20997 
20998 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrecpe_u32(uint32x2_t __a)20999 vrecpe_u32 (uint32x2_t __a)
21000 {
21001   return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
21002 }
21003 
21004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrecpeq_u32(uint32x4_t __a)21005 vrecpeq_u32 (uint32x4_t __a)
21006 {
21007   return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
21008 }
21009 
21010 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpes_f32(float32_t __a)21011 vrecpes_f32 (float32_t __a)
21012 {
21013   return __builtin_aarch64_frecpesf (__a);
21014 }
21015 
21016 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecped_f64(float64_t __a)21017 vrecped_f64 (float64_t __a)
21018 {
21019   return __builtin_aarch64_frecpedf (__a);
21020 }
21021 
21022 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecpe_f32(float32x2_t __a)21023 vrecpe_f32 (float32x2_t __a)
21024 {
21025   return __builtin_aarch64_frecpev2sf (__a);
21026 }
21027 
21028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpeq_f32(float32x4_t __a)21029 vrecpeq_f32 (float32x4_t __a)
21030 {
21031   return __builtin_aarch64_frecpev4sf (__a);
21032 }
21033 
21034 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpeq_f64(float64x2_t __a)21035 vrecpeq_f64 (float64x2_t __a)
21036 {
21037   return __builtin_aarch64_frecpev2df (__a);
21038 }
21039 
21040 /* vrecps  */
21041 
21042 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpss_f32(float32_t __a,float32_t __b)21043 vrecpss_f32 (float32_t __a, float32_t __b)
21044 {
21045   return __builtin_aarch64_frecpssf (__a, __b);
21046 }
21047 
21048 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpsd_f64(float64_t __a,float64_t __b)21049 vrecpsd_f64 (float64_t __a, float64_t __b)
21050 {
21051   return __builtin_aarch64_frecpsdf (__a, __b);
21052 }
21053 
21054 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecps_f32(float32x2_t __a,float32x2_t __b)21055 vrecps_f32 (float32x2_t __a, float32x2_t __b)
21056 {
21057   return __builtin_aarch64_frecpsv2sf (__a, __b);
21058 }
21059 
21060 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpsq_f32(float32x4_t __a,float32x4_t __b)21061 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
21062 {
21063   return __builtin_aarch64_frecpsv4sf (__a, __b);
21064 }
21065 
21066 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpsq_f64(float64x2_t __a,float64x2_t __b)21067 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
21068 {
21069   return __builtin_aarch64_frecpsv2df (__a, __b);
21070 }
21071 
21072 /* vrecpx  */
21073 
21074 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpxs_f32(float32_t __a)21075 vrecpxs_f32 (float32_t __a)
21076 {
21077   return __builtin_aarch64_frecpxsf (__a);
21078 }
21079 
21080 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpxd_f64(float64_t __a)21081 vrecpxd_f64 (float64_t __a)
21082 {
21083   return __builtin_aarch64_frecpxdf (__a);
21084 }
21085 
21086 
21087 /* vrev  */
21088 
21089 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev16_p8(poly8x8_t a)21090 vrev16_p8 (poly8x8_t a)
21091 {
21092   return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21093 }
21094 
21095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev16_s8(int8x8_t a)21096 vrev16_s8 (int8x8_t a)
21097 {
21098   return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21099 }
21100 
21101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev16_u8(uint8x8_t a)21102 vrev16_u8 (uint8x8_t a)
21103 {
21104   return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21105 }
21106 
21107 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev16q_p8(poly8x16_t a)21108 vrev16q_p8 (poly8x16_t a)
21109 {
21110   return __builtin_shuffle (a,
21111       (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21112 }
21113 
21114 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev16q_s8(int8x16_t a)21115 vrev16q_s8 (int8x16_t a)
21116 {
21117   return __builtin_shuffle (a,
21118       (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21119 }
21120 
21121 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev16q_u8(uint8x16_t a)21122 vrev16q_u8 (uint8x16_t a)
21123 {
21124   return __builtin_shuffle (a,
21125       (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21126 }
21127 
21128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev32_p8(poly8x8_t a)21129 vrev32_p8 (poly8x8_t a)
21130 {
21131   return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21132 }
21133 
21134 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vrev32_p16(poly16x4_t a)21135 vrev32_p16 (poly16x4_t a)
21136 {
21137   return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21138 }
21139 
21140 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev32_s8(int8x8_t a)21141 vrev32_s8 (int8x8_t a)
21142 {
21143   return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21144 }
21145 
21146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrev32_s16(int16x4_t a)21147 vrev32_s16 (int16x4_t a)
21148 {
21149   return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21150 }
21151 
21152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev32_u8(uint8x8_t a)21153 vrev32_u8 (uint8x8_t a)
21154 {
21155   return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21156 }
21157 
21158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrev32_u16(uint16x4_t a)21159 vrev32_u16 (uint16x4_t a)
21160 {
21161   return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21162 }
21163 
21164 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev32q_p8(poly8x16_t a)21165 vrev32q_p8 (poly8x16_t a)
21166 {
21167   return __builtin_shuffle (a,
21168       (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21169 }
21170 
21171 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vrev32q_p16(poly16x8_t a)21172 vrev32q_p16 (poly16x8_t a)
21173 {
21174   return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21175 }
21176 
21177 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev32q_s8(int8x16_t a)21178 vrev32q_s8 (int8x16_t a)
21179 {
21180   return __builtin_shuffle (a,
21181       (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21182 }
21183 
21184 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrev32q_s16(int16x8_t a)21185 vrev32q_s16 (int16x8_t a)
21186 {
21187   return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21188 }
21189 
21190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev32q_u8(uint8x16_t a)21191 vrev32q_u8 (uint8x16_t a)
21192 {
21193   return __builtin_shuffle (a,
21194       (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21195 }
21196 
21197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrev32q_u16(uint16x8_t a)21198 vrev32q_u16 (uint16x8_t a)
21199 {
21200   return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21201 }
21202 
21203 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrev64_f32(float32x2_t a)21204 vrev64_f32 (float32x2_t a)
21205 {
21206   return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21207 }
21208 
21209 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev64_p8(poly8x8_t a)21210 vrev64_p8 (poly8x8_t a)
21211 {
21212   return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21213 }
21214 
21215 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vrev64_p16(poly16x4_t a)21216 vrev64_p16 (poly16x4_t a)
21217 {
21218   return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21219 }
21220 
21221 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev64_s8(int8x8_t a)21222 vrev64_s8 (int8x8_t a)
21223 {
21224   return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21225 }
21226 
21227 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrev64_s16(int16x4_t a)21228 vrev64_s16 (int16x4_t a)
21229 {
21230   return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21231 }
21232 
21233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrev64_s32(int32x2_t a)21234 vrev64_s32 (int32x2_t a)
21235 {
21236   return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21237 }
21238 
21239 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev64_u8(uint8x8_t a)21240 vrev64_u8 (uint8x8_t a)
21241 {
21242   return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21243 }
21244 
21245 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrev64_u16(uint16x4_t a)21246 vrev64_u16 (uint16x4_t a)
21247 {
21248   return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21249 }
21250 
21251 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrev64_u32(uint32x2_t a)21252 vrev64_u32 (uint32x2_t a)
21253 {
21254   return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21255 }
21256 
21257 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrev64q_f32(float32x4_t a)21258 vrev64q_f32 (float32x4_t a)
21259 {
21260   return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21261 }
21262 
21263 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev64q_p8(poly8x16_t a)21264 vrev64q_p8 (poly8x16_t a)
21265 {
21266   return __builtin_shuffle (a,
21267       (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21268 }
21269 
21270 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vrev64q_p16(poly16x8_t a)21271 vrev64q_p16 (poly16x8_t a)
21272 {
21273   return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21274 }
21275 
21276 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev64q_s8(int8x16_t a)21277 vrev64q_s8 (int8x16_t a)
21278 {
21279   return __builtin_shuffle (a,
21280       (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21281 }
21282 
21283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrev64q_s16(int16x8_t a)21284 vrev64q_s16 (int16x8_t a)
21285 {
21286   return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21287 }
21288 
21289 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrev64q_s32(int32x4_t a)21290 vrev64q_s32 (int32x4_t a)
21291 {
21292   return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21293 }
21294 
21295 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev64q_u8(uint8x16_t a)21296 vrev64q_u8 (uint8x16_t a)
21297 {
21298   return __builtin_shuffle (a,
21299       (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21300 }
21301 
21302 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrev64q_u16(uint16x8_t a)21303 vrev64q_u16 (uint16x8_t a)
21304 {
21305   return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21306 }
21307 
21308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrev64q_u32(uint32x4_t a)21309 vrev64q_u32 (uint32x4_t a)
21310 {
21311   return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21312 }
21313 
21314 /* vrnd  */
21315 
21316 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrnd_f32(float32x2_t __a)21317 vrnd_f32 (float32x2_t __a)
21318 {
21319   return __builtin_aarch64_btruncv2sf (__a);
21320 }
21321 
21322 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrnd_f64(float64x1_t __a)21323 vrnd_f64 (float64x1_t __a)
21324 {
21325   return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21326 }
21327 
21328 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndq_f32(float32x4_t __a)21329 vrndq_f32 (float32x4_t __a)
21330 {
21331   return __builtin_aarch64_btruncv4sf (__a);
21332 }
21333 
21334 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndq_f64(float64x2_t __a)21335 vrndq_f64 (float64x2_t __a)
21336 {
21337   return __builtin_aarch64_btruncv2df (__a);
21338 }
21339 
21340 /* vrnda  */
21341 
21342 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrnda_f32(float32x2_t __a)21343 vrnda_f32 (float32x2_t __a)
21344 {
21345   return __builtin_aarch64_roundv2sf (__a);
21346 }
21347 
21348 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrnda_f64(float64x1_t __a)21349 vrnda_f64 (float64x1_t __a)
21350 {
21351   return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21352 }
21353 
21354 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndaq_f32(float32x4_t __a)21355 vrndaq_f32 (float32x4_t __a)
21356 {
21357   return __builtin_aarch64_roundv4sf (__a);
21358 }
21359 
21360 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndaq_f64(float64x2_t __a)21361 vrndaq_f64 (float64x2_t __a)
21362 {
21363   return __builtin_aarch64_roundv2df (__a);
21364 }
21365 
21366 /* vrndi  */
21367 
21368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndi_f32(float32x2_t __a)21369 vrndi_f32 (float32x2_t __a)
21370 {
21371   return __builtin_aarch64_nearbyintv2sf (__a);
21372 }
21373 
21374 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrndi_f64(float64x1_t __a)21375 vrndi_f64 (float64x1_t __a)
21376 {
21377   return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21378 }
21379 
21380 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndiq_f32(float32x4_t __a)21381 vrndiq_f32 (float32x4_t __a)
21382 {
21383   return __builtin_aarch64_nearbyintv4sf (__a);
21384 }
21385 
21386 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndiq_f64(float64x2_t __a)21387 vrndiq_f64 (float64x2_t __a)
21388 {
21389   return __builtin_aarch64_nearbyintv2df (__a);
21390 }
21391 
21392 /* vrndm  */
21393 
21394 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndm_f32(float32x2_t __a)21395 vrndm_f32 (float32x2_t __a)
21396 {
21397   return __builtin_aarch64_floorv2sf (__a);
21398 }
21399 
21400 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrndm_f64(float64x1_t __a)21401 vrndm_f64 (float64x1_t __a)
21402 {
21403   return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21404 }
21405 
21406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndmq_f32(float32x4_t __a)21407 vrndmq_f32 (float32x4_t __a)
21408 {
21409   return __builtin_aarch64_floorv4sf (__a);
21410 }
21411 
21412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndmq_f64(float64x2_t __a)21413 vrndmq_f64 (float64x2_t __a)
21414 {
21415   return __builtin_aarch64_floorv2df (__a);
21416 }
21417 
21418 /* vrndn  */
21419 
21420 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndn_f32(float32x2_t __a)21421 vrndn_f32 (float32x2_t __a)
21422 {
21423   return __builtin_aarch64_frintnv2sf (__a);
21424 }
21425 
21426 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrndn_f64(float64x1_t __a)21427 vrndn_f64 (float64x1_t __a)
21428 {
21429   return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21430 }
21431 
21432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndnq_f32(float32x4_t __a)21433 vrndnq_f32 (float32x4_t __a)
21434 {
21435   return __builtin_aarch64_frintnv4sf (__a);
21436 }
21437 
21438 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndnq_f64(float64x2_t __a)21439 vrndnq_f64 (float64x2_t __a)
21440 {
21441   return __builtin_aarch64_frintnv2df (__a);
21442 }
21443 
21444 /* vrndp  */
21445 
21446 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndp_f32(float32x2_t __a)21447 vrndp_f32 (float32x2_t __a)
21448 {
21449   return __builtin_aarch64_ceilv2sf (__a);
21450 }
21451 
21452 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrndp_f64(float64x1_t __a)21453 vrndp_f64 (float64x1_t __a)
21454 {
21455   return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21456 }
21457 
21458 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndpq_f32(float32x4_t __a)21459 vrndpq_f32 (float32x4_t __a)
21460 {
21461   return __builtin_aarch64_ceilv4sf (__a);
21462 }
21463 
21464 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndpq_f64(float64x2_t __a)21465 vrndpq_f64 (float64x2_t __a)
21466 {
21467   return __builtin_aarch64_ceilv2df (__a);
21468 }
21469 
21470 /* vrndx  */
21471 
21472 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndx_f32(float32x2_t __a)21473 vrndx_f32 (float32x2_t __a)
21474 {
21475   return __builtin_aarch64_rintv2sf (__a);
21476 }
21477 
21478 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vrndx_f64(float64x1_t __a)21479 vrndx_f64 (float64x1_t __a)
21480 {
21481   return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21482 }
21483 
21484 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndxq_f32(float32x4_t __a)21485 vrndxq_f32 (float32x4_t __a)
21486 {
21487   return __builtin_aarch64_rintv4sf (__a);
21488 }
21489 
21490 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndxq_f64(float64x2_t __a)21491 vrndxq_f64 (float64x2_t __a)
21492 {
21493   return __builtin_aarch64_rintv2df (__a);
21494 }
21495 
21496 /* vrshl */
21497 
21498 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrshl_s8(int8x8_t __a,int8x8_t __b)21499 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21500 {
21501   return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21502 }
21503 
21504 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrshl_s16(int16x4_t __a,int16x4_t __b)21505 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21506 {
21507   return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21508 }
21509 
21510 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrshl_s32(int32x2_t __a,int32x2_t __b)21511 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21512 {
21513   return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21514 }
21515 
21516 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshl_s64(int64x1_t __a,int64x1_t __b)21517 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21518 {
21519   return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21520 }
21521 
21522 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrshl_u8(uint8x8_t __a,int8x8_t __b)21523 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21524 {
21525   return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21526 }
21527 
21528 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrshl_u16(uint16x4_t __a,int16x4_t __b)21529 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21530 {
21531   return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21532 }
21533 
21534 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrshl_u32(uint32x2_t __a,int32x2_t __b)21535 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21536 {
21537   return __builtin_aarch64_urshlv2si_uus (__a, __b);
21538 }
21539 
21540 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshl_u64(uint64x1_t __a,int64x1_t __b)21541 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21542 {
21543   return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21544 }
21545 
21546 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrshlq_s8(int8x16_t __a,int8x16_t __b)21547 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21548 {
21549   return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21550 }
21551 
21552 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrshlq_s16(int16x8_t __a,int16x8_t __b)21553 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21554 {
21555   return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21556 }
21557 
21558 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrshlq_s32(int32x4_t __a,int32x4_t __b)21559 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21560 {
21561   return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21562 }
21563 
21564 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrshlq_s64(int64x2_t __a,int64x2_t __b)21565 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21566 {
21567   return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21568 }
21569 
21570 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrshlq_u8(uint8x16_t __a,int8x16_t __b)21571 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21572 {
21573   return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21574 }
21575 
21576 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrshlq_u16(uint16x8_t __a,int16x8_t __b)21577 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21578 {
21579   return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21580 }
21581 
21582 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrshlq_u32(uint32x4_t __a,int32x4_t __b)21583 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21584 {
21585   return __builtin_aarch64_urshlv4si_uus (__a, __b);
21586 }
21587 
21588 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrshlq_u64(uint64x2_t __a,int64x2_t __b)21589 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21590 {
21591   return __builtin_aarch64_urshlv2di_uus (__a, __b);
21592 }
21593 
21594 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vrshld_s64(int64_t __a,int64_t __b)21595 vrshld_s64 (int64_t __a, int64_t __b)
21596 {
21597   return __builtin_aarch64_srshldi (__a, __b);
21598 }
21599 
21600 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vrshld_u64(uint64_t __a,int64_t __b)21601 vrshld_u64 (uint64_t __a, int64_t __b)
21602 {
21603   return __builtin_aarch64_urshldi_uus (__a, __b);
21604 }
21605 
21606 /* vrshr */
21607 
21608 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrshr_n_s8(int8x8_t __a,const int __b)21609 vrshr_n_s8 (int8x8_t __a, const int __b)
21610 {
21611   return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21612 }
21613 
21614 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrshr_n_s16(int16x4_t __a,const int __b)21615 vrshr_n_s16 (int16x4_t __a, const int __b)
21616 {
21617   return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21618 }
21619 
21620 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrshr_n_s32(int32x2_t __a,const int __b)21621 vrshr_n_s32 (int32x2_t __a, const int __b)
21622 {
21623   return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21624 }
21625 
21626 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshr_n_s64(int64x1_t __a,const int __b)21627 vrshr_n_s64 (int64x1_t __a, const int __b)
21628 {
21629   return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21630 }
21631 
21632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrshr_n_u8(uint8x8_t __a,const int __b)21633 vrshr_n_u8 (uint8x8_t __a, const int __b)
21634 {
21635   return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21636 }
21637 
21638 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrshr_n_u16(uint16x4_t __a,const int __b)21639 vrshr_n_u16 (uint16x4_t __a, const int __b)
21640 {
21641   return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21642 }
21643 
21644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrshr_n_u32(uint32x2_t __a,const int __b)21645 vrshr_n_u32 (uint32x2_t __a, const int __b)
21646 {
21647   return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21648 }
21649 
21650 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshr_n_u64(uint64x1_t __a,const int __b)21651 vrshr_n_u64 (uint64x1_t __a, const int __b)
21652 {
21653   return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21654 }
21655 
21656 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrshrq_n_s8(int8x16_t __a,const int __b)21657 vrshrq_n_s8 (int8x16_t __a, const int __b)
21658 {
21659   return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21660 }
21661 
21662 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrshrq_n_s16(int16x8_t __a,const int __b)21663 vrshrq_n_s16 (int16x8_t __a, const int __b)
21664 {
21665   return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21666 }
21667 
21668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrshrq_n_s32(int32x4_t __a,const int __b)21669 vrshrq_n_s32 (int32x4_t __a, const int __b)
21670 {
21671   return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21672 }
21673 
21674 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrshrq_n_s64(int64x2_t __a,const int __b)21675 vrshrq_n_s64 (int64x2_t __a, const int __b)
21676 {
21677   return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21678 }
21679 
21680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrshrq_n_u8(uint8x16_t __a,const int __b)21681 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21682 {
21683   return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21684 }
21685 
21686 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrshrq_n_u16(uint16x8_t __a,const int __b)21687 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21688 {
21689   return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21690 }
21691 
21692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrshrq_n_u32(uint32x4_t __a,const int __b)21693 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21694 {
21695   return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21696 }
21697 
21698 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrshrq_n_u64(uint64x2_t __a,const int __b)21699 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21700 {
21701   return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21702 }
21703 
21704 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vrshrd_n_s64(int64_t __a,const int __b)21705 vrshrd_n_s64 (int64_t __a, const int __b)
21706 {
21707   return __builtin_aarch64_srshr_ndi (__a, __b);
21708 }
21709 
21710 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vrshrd_n_u64(uint64_t __a,const int __b)21711 vrshrd_n_u64 (uint64_t __a, const int __b)
21712 {
21713   return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21714 }
21715 
21716 /* vrsra */
21717 
21718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrsra_n_s8(int8x8_t __a,int8x8_t __b,const int __c)21719 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21720 {
21721   return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21722 }
21723 
21724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrsra_n_s16(int16x4_t __a,int16x4_t __b,const int __c)21725 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21726 {
21727   return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21728 }
21729 
21730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrsra_n_s32(int32x2_t __a,int32x2_t __b,const int __c)21731 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21732 {
21733   return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21734 }
21735 
21736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrsra_n_s64(int64x1_t __a,int64x1_t __b,const int __c)21737 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21738 {
21739   return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21740 }
21741 
21742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrsra_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)21743 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21744 {
21745   return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21746 }
21747 
21748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrsra_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)21749 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21750 {
21751   return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21752 }
21753 
21754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsra_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)21755 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21756 {
21757   return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21758 }
21759 
21760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrsra_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)21761 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21762 {
21763   return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21764 }
21765 
21766 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrsraq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)21767 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21768 {
21769   return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21770 }
21771 
21772 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrsraq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)21773 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21774 {
21775   return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21776 }
21777 
21778 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrsraq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)21779 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21780 {
21781   return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21782 }
21783 
21784 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrsraq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)21785 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21786 {
21787   return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21788 }
21789 
21790 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrsraq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)21791 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21792 {
21793   return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21794 }
21795 
21796 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrsraq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)21797 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21798 {
21799   return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21800 }
21801 
21802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsraq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)21803 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21804 {
21805   return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21806 }
21807 
21808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrsraq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)21809 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21810 {
21811   return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21812 }
21813 
21814 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vrsrad_n_s64(int64_t __a,int64_t __b,const int __c)21815 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21816 {
21817   return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21818 }
21819 
21820 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vrsrad_n_u64(uint64_t __a,uint64_t __b,const int __c)21821 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21822 {
21823   return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21824 }
21825 
21826 #pragma GCC push_options
21827 #pragma GCC target ("+nothing+crypto")
21828 
21829 /* vsha1  */
21830 
21831 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha1cq_u32(uint32x4_t hash_abcd,uint32_t hash_e,uint32x4_t wk)21832 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21833 {
21834   return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21835 }
21836 
21837 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha1mq_u32(uint32x4_t hash_abcd,uint32_t hash_e,uint32x4_t wk)21838 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21839 {
21840   return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21841 }
21842 
21843 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha1pq_u32(uint32x4_t hash_abcd,uint32_t hash_e,uint32x4_t wk)21844 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21845 {
21846   return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21847 }
21848 
21849 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vsha1h_u32(uint32_t hash_e)21850 vsha1h_u32 (uint32_t hash_e)
21851 {
21852   return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21853 }
21854 
21855 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha1su0q_u32(uint32x4_t w0_3,uint32x4_t w4_7,uint32x4_t w8_11)21856 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21857 {
21858   return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21859 }
21860 
21861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha1su1q_u32(uint32x4_t tw0_3,uint32x4_t w12_15)21862 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21863 {
21864   return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21865 }
21866 
21867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha256hq_u32(uint32x4_t hash_abcd,uint32x4_t hash_efgh,uint32x4_t wk)21868 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21869 {
21870   return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21871 }
21872 
21873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha256h2q_u32(uint32x4_t hash_efgh,uint32x4_t hash_abcd,uint32x4_t wk)21874 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21875 {
21876   return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21877 }
21878 
21879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha256su0q_u32(uint32x4_t w0_3,uint32x4_t w4_7)21880 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21881 {
21882   return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21883 }
21884 
21885 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsha256su1q_u32(uint32x4_t tw0_3,uint32x4_t w8_11,uint32x4_t w12_15)21886 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21887 {
21888   return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21889 }
21890 
21891 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vmull_p64(poly64_t a,poly64_t b)21892 vmull_p64 (poly64_t a, poly64_t b)
21893 {
21894   return
21895     __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21896 }
21897 
21898 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vmull_high_p64(poly64x2_t a,poly64x2_t b)21899 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21900 {
21901   return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21902 }
21903 
21904 #pragma GCC pop_options
21905 
21906 /* vshl */
21907 
21908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshl_n_s8(int8x8_t __a,const int __b)21909 vshl_n_s8 (int8x8_t __a, const int __b)
21910 {
21911   return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21912 }
21913 
21914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshl_n_s16(int16x4_t __a,const int __b)21915 vshl_n_s16 (int16x4_t __a, const int __b)
21916 {
21917   return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21918 }
21919 
21920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshl_n_s32(int32x2_t __a,const int __b)21921 vshl_n_s32 (int32x2_t __a, const int __b)
21922 {
21923   return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21924 }
21925 
21926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshl_n_s64(int64x1_t __a,const int __b)21927 vshl_n_s64 (int64x1_t __a, const int __b)
21928 {
21929   return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21930 }
21931 
21932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshl_n_u8(uint8x8_t __a,const int __b)21933 vshl_n_u8 (uint8x8_t __a, const int __b)
21934 {
21935   return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21936 }
21937 
21938 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshl_n_u16(uint16x4_t __a,const int __b)21939 vshl_n_u16 (uint16x4_t __a, const int __b)
21940 {
21941   return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21942 }
21943 
21944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshl_n_u32(uint32x2_t __a,const int __b)21945 vshl_n_u32 (uint32x2_t __a, const int __b)
21946 {
21947   return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21948 }
21949 
21950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshl_n_u64(uint64x1_t __a,const int __b)21951 vshl_n_u64 (uint64x1_t __a, const int __b)
21952 {
21953   return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21954 }
21955 
21956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshlq_n_s8(int8x16_t __a,const int __b)21957 vshlq_n_s8 (int8x16_t __a, const int __b)
21958 {
21959   return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21960 }
21961 
21962 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshlq_n_s16(int16x8_t __a,const int __b)21963 vshlq_n_s16 (int16x8_t __a, const int __b)
21964 {
21965   return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21966 }
21967 
21968 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshlq_n_s32(int32x4_t __a,const int __b)21969 vshlq_n_s32 (int32x4_t __a, const int __b)
21970 {
21971   return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21972 }
21973 
21974 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshlq_n_s64(int64x2_t __a,const int __b)21975 vshlq_n_s64 (int64x2_t __a, const int __b)
21976 {
21977   return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21978 }
21979 
21980 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshlq_n_u8(uint8x16_t __a,const int __b)21981 vshlq_n_u8 (uint8x16_t __a, const int __b)
21982 {
21983   return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21984 }
21985 
21986 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshlq_n_u16(uint16x8_t __a,const int __b)21987 vshlq_n_u16 (uint16x8_t __a, const int __b)
21988 {
21989   return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21990 }
21991 
21992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshlq_n_u32(uint32x4_t __a,const int __b)21993 vshlq_n_u32 (uint32x4_t __a, const int __b)
21994 {
21995   return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21996 }
21997 
21998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshlq_n_u64(uint64x2_t __a,const int __b)21999 vshlq_n_u64 (uint64x2_t __a, const int __b)
22000 {
22001   return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
22002 }
22003 
22004 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vshld_n_s64(int64_t __a,const int __b)22005 vshld_n_s64 (int64_t __a, const int __b)
22006 {
22007   return __builtin_aarch64_ashldi (__a, __b);
22008 }
22009 
22010 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vshld_n_u64(uint64_t __a,const int __b)22011 vshld_n_u64 (uint64_t __a, const int __b)
22012 {
22013   return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
22014 }
22015 
22016 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshl_s8(int8x8_t __a,int8x8_t __b)22017 vshl_s8 (int8x8_t __a, int8x8_t __b)
22018 {
22019   return __builtin_aarch64_sshlv8qi (__a, __b);
22020 }
22021 
22022 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshl_s16(int16x4_t __a,int16x4_t __b)22023 vshl_s16 (int16x4_t __a, int16x4_t __b)
22024 {
22025   return __builtin_aarch64_sshlv4hi (__a, __b);
22026 }
22027 
22028 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshl_s32(int32x2_t __a,int32x2_t __b)22029 vshl_s32 (int32x2_t __a, int32x2_t __b)
22030 {
22031   return __builtin_aarch64_sshlv2si (__a, __b);
22032 }
22033 
22034 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshl_s64(int64x1_t __a,int64x1_t __b)22035 vshl_s64 (int64x1_t __a, int64x1_t __b)
22036 {
22037   return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
22038 }
22039 
22040 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshl_u8(uint8x8_t __a,int8x8_t __b)22041 vshl_u8 (uint8x8_t __a, int8x8_t __b)
22042 {
22043   return __builtin_aarch64_ushlv8qi_uus (__a, __b);
22044 }
22045 
22046 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshl_u16(uint16x4_t __a,int16x4_t __b)22047 vshl_u16 (uint16x4_t __a, int16x4_t __b)
22048 {
22049   return __builtin_aarch64_ushlv4hi_uus (__a, __b);
22050 }
22051 
22052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshl_u32(uint32x2_t __a,int32x2_t __b)22053 vshl_u32 (uint32x2_t __a, int32x2_t __b)
22054 {
22055   return __builtin_aarch64_ushlv2si_uus (__a, __b);
22056 }
22057 
22058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshl_u64(uint64x1_t __a,int64x1_t __b)22059 vshl_u64 (uint64x1_t __a, int64x1_t __b)
22060 {
22061   return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
22062 }
22063 
22064 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshlq_s8(int8x16_t __a,int8x16_t __b)22065 vshlq_s8 (int8x16_t __a, int8x16_t __b)
22066 {
22067   return __builtin_aarch64_sshlv16qi (__a, __b);
22068 }
22069 
22070 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshlq_s16(int16x8_t __a,int16x8_t __b)22071 vshlq_s16 (int16x8_t __a, int16x8_t __b)
22072 {
22073   return __builtin_aarch64_sshlv8hi (__a, __b);
22074 }
22075 
22076 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshlq_s32(int32x4_t __a,int32x4_t __b)22077 vshlq_s32 (int32x4_t __a, int32x4_t __b)
22078 {
22079   return __builtin_aarch64_sshlv4si (__a, __b);
22080 }
22081 
22082 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshlq_s64(int64x2_t __a,int64x2_t __b)22083 vshlq_s64 (int64x2_t __a, int64x2_t __b)
22084 {
22085   return __builtin_aarch64_sshlv2di (__a, __b);
22086 }
22087 
22088 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshlq_u8(uint8x16_t __a,int8x16_t __b)22089 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
22090 {
22091   return __builtin_aarch64_ushlv16qi_uus (__a, __b);
22092 }
22093 
22094 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshlq_u16(uint16x8_t __a,int16x8_t __b)22095 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
22096 {
22097   return __builtin_aarch64_ushlv8hi_uus (__a, __b);
22098 }
22099 
22100 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshlq_u32(uint32x4_t __a,int32x4_t __b)22101 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
22102 {
22103   return __builtin_aarch64_ushlv4si_uus (__a, __b);
22104 }
22105 
22106 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshlq_u64(uint64x2_t __a,int64x2_t __b)22107 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
22108 {
22109   return __builtin_aarch64_ushlv2di_uus (__a, __b);
22110 }
22111 
22112 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vshld_s64(int64_t __a,int64_t __b)22113 vshld_s64 (int64_t __a, int64_t __b)
22114 {
22115   return __builtin_aarch64_sshldi (__a, __b);
22116 }
22117 
22118 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vshld_u64(uint64_t __a,uint64_t __b)22119 vshld_u64 (uint64_t __a, uint64_t __b)
22120 {
22121   return __builtin_aarch64_ushldi_uus (__a, __b);
22122 }
22123 
22124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshll_high_n_s8(int8x16_t __a,const int __b)22125 vshll_high_n_s8 (int8x16_t __a, const int __b)
22126 {
22127   return __builtin_aarch64_sshll2_nv16qi (__a, __b);
22128 }
22129 
22130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshll_high_n_s16(int16x8_t __a,const int __b)22131 vshll_high_n_s16 (int16x8_t __a, const int __b)
22132 {
22133   return __builtin_aarch64_sshll2_nv8hi (__a, __b);
22134 }
22135 
22136 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshll_high_n_s32(int32x4_t __a,const int __b)22137 vshll_high_n_s32 (int32x4_t __a, const int __b)
22138 {
22139   return __builtin_aarch64_sshll2_nv4si (__a, __b);
22140 }
22141 
22142 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshll_high_n_u8(uint8x16_t __a,const int __b)22143 vshll_high_n_u8 (uint8x16_t __a, const int __b)
22144 {
22145   return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
22146 }
22147 
22148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshll_high_n_u16(uint16x8_t __a,const int __b)22149 vshll_high_n_u16 (uint16x8_t __a, const int __b)
22150 {
22151   return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
22152 }
22153 
22154 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshll_high_n_u32(uint32x4_t __a,const int __b)22155 vshll_high_n_u32 (uint32x4_t __a, const int __b)
22156 {
22157   return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
22158 }
22159 
22160 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshll_n_s8(int8x8_t __a,const int __b)22161 vshll_n_s8 (int8x8_t __a, const int __b)
22162 {
22163   return __builtin_aarch64_sshll_nv8qi (__a, __b);
22164 }
22165 
22166 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshll_n_s16(int16x4_t __a,const int __b)22167 vshll_n_s16 (int16x4_t __a, const int __b)
22168 {
22169   return __builtin_aarch64_sshll_nv4hi (__a, __b);
22170 }
22171 
22172 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshll_n_s32(int32x2_t __a,const int __b)22173 vshll_n_s32 (int32x2_t __a, const int __b)
22174 {
22175   return __builtin_aarch64_sshll_nv2si (__a, __b);
22176 }
22177 
22178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshll_n_u8(uint8x8_t __a,const int __b)22179 vshll_n_u8 (uint8x8_t __a, const int __b)
22180 {
22181   return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
22182 }
22183 
22184 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshll_n_u16(uint16x4_t __a,const int __b)22185 vshll_n_u16 (uint16x4_t __a, const int __b)
22186 {
22187   return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
22188 }
22189 
22190 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshll_n_u32(uint32x2_t __a,const int __b)22191 vshll_n_u32 (uint32x2_t __a, const int __b)
22192 {
22193   return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
22194 }
22195 
22196 /* vshr */
22197 
22198 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshr_n_s8(int8x8_t __a,const int __b)22199 vshr_n_s8 (int8x8_t __a, const int __b)
22200 {
22201   return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
22202 }
22203 
22204 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshr_n_s16(int16x4_t __a,const int __b)22205 vshr_n_s16 (int16x4_t __a, const int __b)
22206 {
22207   return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
22208 }
22209 
22210 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshr_n_s32(int32x2_t __a,const int __b)22211 vshr_n_s32 (int32x2_t __a, const int __b)
22212 {
22213   return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
22214 }
22215 
22216 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshr_n_s64(int64x1_t __a,const int __b)22217 vshr_n_s64 (int64x1_t __a, const int __b)
22218 {
22219   return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
22220 }
22221 
22222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshr_n_u8(uint8x8_t __a,const int __b)22223 vshr_n_u8 (uint8x8_t __a, const int __b)
22224 {
22225   return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
22226 }
22227 
22228 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshr_n_u16(uint16x4_t __a,const int __b)22229 vshr_n_u16 (uint16x4_t __a, const int __b)
22230 {
22231   return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22232 }
22233 
22234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshr_n_u32(uint32x2_t __a,const int __b)22235 vshr_n_u32 (uint32x2_t __a, const int __b)
22236 {
22237   return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22238 }
22239 
22240 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshr_n_u64(uint64x1_t __a,const int __b)22241 vshr_n_u64 (uint64x1_t __a, const int __b)
22242 {
22243   return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22244 }
22245 
22246 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshrq_n_s8(int8x16_t __a,const int __b)22247 vshrq_n_s8 (int8x16_t __a, const int __b)
22248 {
22249   return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22250 }
22251 
22252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshrq_n_s16(int16x8_t __a,const int __b)22253 vshrq_n_s16 (int16x8_t __a, const int __b)
22254 {
22255   return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22256 }
22257 
22258 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshrq_n_s32(int32x4_t __a,const int __b)22259 vshrq_n_s32 (int32x4_t __a, const int __b)
22260 {
22261   return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22262 }
22263 
22264 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshrq_n_s64(int64x2_t __a,const int __b)22265 vshrq_n_s64 (int64x2_t __a, const int __b)
22266 {
22267   return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22268 }
22269 
22270 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshrq_n_u8(uint8x16_t __a,const int __b)22271 vshrq_n_u8 (uint8x16_t __a, const int __b)
22272 {
22273   return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22274 }
22275 
22276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshrq_n_u16(uint16x8_t __a,const int __b)22277 vshrq_n_u16 (uint16x8_t __a, const int __b)
22278 {
22279   return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22280 }
22281 
22282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshrq_n_u32(uint32x4_t __a,const int __b)22283 vshrq_n_u32 (uint32x4_t __a, const int __b)
22284 {
22285   return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22286 }
22287 
22288 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshrq_n_u64(uint64x2_t __a,const int __b)22289 vshrq_n_u64 (uint64x2_t __a, const int __b)
22290 {
22291   return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22292 }
22293 
22294 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vshrd_n_s64(int64_t __a,const int __b)22295 vshrd_n_s64 (int64_t __a, const int __b)
22296 {
22297   return __builtin_aarch64_ashr_simddi (__a, __b);
22298 }
22299 
22300 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vshrd_n_u64(uint64_t __a,const int __b)22301 vshrd_n_u64 (uint64_t __a, const int __b)
22302 {
22303   return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22304 }
22305 
22306 /* vsli */
22307 
22308 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsli_n_s8(int8x8_t __a,int8x8_t __b,const int __c)22309 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22310 {
22311   return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22312 }
22313 
22314 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsli_n_s16(int16x4_t __a,int16x4_t __b,const int __c)22315 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22316 {
22317   return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22318 }
22319 
22320 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsli_n_s32(int32x2_t __a,int32x2_t __b,const int __c)22321 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22322 {
22323   return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22324 }
22325 
22326 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsli_n_s64(int64x1_t __a,int64x1_t __b,const int __c)22327 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22328 {
22329   return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22330 }
22331 
22332 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsli_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)22333 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22334 {
22335   return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22336 }
22337 
22338 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsli_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)22339 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22340 {
22341   return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22342 }
22343 
22344 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsli_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)22345 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22346 {
22347   return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22348 }
22349 
22350 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsli_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)22351 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22352 {
22353   return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22354 }
22355 
22356 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsliq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)22357 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22358 {
22359   return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22360 }
22361 
22362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsliq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)22363 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22364 {
22365   return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22366 }
22367 
22368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsliq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)22369 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22370 {
22371   return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22372 }
22373 
22374 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsliq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)22375 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22376 {
22377   return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22378 }
22379 
22380 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsliq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)22381 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22382 {
22383   return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22384 }
22385 
22386 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsliq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)22387 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22388 {
22389   return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22390 }
22391 
22392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsliq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)22393 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22394 {
22395   return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22396 }
22397 
22398 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsliq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)22399 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22400 {
22401   return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22402 }
22403 
22404 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vslid_n_s64(int64_t __a,int64_t __b,const int __c)22405 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22406 {
22407   return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22408 }
22409 
22410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vslid_n_u64(uint64_t __a,uint64_t __b,const int __c)22411 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22412 {
22413   return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22414 }
22415 
22416 /* vsqadd */
22417 
22418 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsqadd_u8(uint8x8_t __a,int8x8_t __b)22419 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22420 {
22421   return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22422 }
22423 
22424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsqadd_u16(uint16x4_t __a,int16x4_t __b)22425 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22426 {
22427   return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22428 }
22429 
22430 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsqadd_u32(uint32x2_t __a,int32x2_t __b)22431 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22432 {
22433   return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22434 }
22435 
22436 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsqadd_u64(uint64x1_t __a,int64x1_t __b)22437 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22438 {
22439   return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22440 }
22441 
22442 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsqaddq_u8(uint8x16_t __a,int8x16_t __b)22443 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22444 {
22445   return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22446 }
22447 
22448 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsqaddq_u16(uint16x8_t __a,int16x8_t __b)22449 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22450 {
22451   return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22452 }
22453 
22454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsqaddq_u32(uint32x4_t __a,int32x4_t __b)22455 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22456 {
22457   return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22458 }
22459 
22460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsqaddq_u64(uint64x2_t __a,int64x2_t __b)22461 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22462 {
22463   return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22464 }
22465 
22466 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vsqaddb_u8(uint8_t __a,int8_t __b)22467 vsqaddb_u8 (uint8_t __a, int8_t __b)
22468 {
22469   return __builtin_aarch64_usqaddqi_uus (__a, __b);
22470 }
22471 
22472 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vsqaddh_u16(uint16_t __a,int16_t __b)22473 vsqaddh_u16 (uint16_t __a, int16_t __b)
22474 {
22475   return __builtin_aarch64_usqaddhi_uus (__a, __b);
22476 }
22477 
22478 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vsqadds_u32(uint32_t __a,int32_t __b)22479 vsqadds_u32 (uint32_t __a, int32_t __b)
22480 {
22481   return __builtin_aarch64_usqaddsi_uus (__a, __b);
22482 }
22483 
22484 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vsqaddd_u64(uint64_t __a,int64_t __b)22485 vsqaddd_u64 (uint64_t __a, int64_t __b)
22486 {
22487   return __builtin_aarch64_usqadddi_uus (__a, __b);
22488 }
22489 
22490 /* vsqrt */
22491 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vsqrt_f32(float32x2_t a)22492 vsqrt_f32 (float32x2_t a)
22493 {
22494   return __builtin_aarch64_sqrtv2sf (a);
22495 }
22496 
22497 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsqrtq_f32(float32x4_t a)22498 vsqrtq_f32 (float32x4_t a)
22499 {
22500   return __builtin_aarch64_sqrtv4sf (a);
22501 }
22502 
22503 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vsqrt_f64(float64x1_t a)22504 vsqrt_f64 (float64x1_t a)
22505 {
22506   return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) };
22507 }
22508 
22509 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vsqrtq_f64(float64x2_t a)22510 vsqrtq_f64 (float64x2_t a)
22511 {
22512   return __builtin_aarch64_sqrtv2df (a);
22513 }
22514 
22515 /* vsra */
22516 
22517 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsra_n_s8(int8x8_t __a,int8x8_t __b,const int __c)22518 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22519 {
22520   return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22521 }
22522 
22523 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsra_n_s16(int16x4_t __a,int16x4_t __b,const int __c)22524 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22525 {
22526   return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22527 }
22528 
22529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsra_n_s32(int32x2_t __a,int32x2_t __b,const int __c)22530 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22531 {
22532   return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22533 }
22534 
22535 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsra_n_s64(int64x1_t __a,int64x1_t __b,const int __c)22536 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22537 {
22538   return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22539 }
22540 
22541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsra_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)22542 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22543 {
22544   return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22545 }
22546 
22547 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsra_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)22548 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22549 {
22550   return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22551 }
22552 
22553 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsra_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)22554 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22555 {
22556   return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22557 }
22558 
22559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsra_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)22560 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22561 {
22562   return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22563 }
22564 
22565 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsraq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)22566 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22567 {
22568   return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22569 }
22570 
22571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsraq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)22572 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22573 {
22574   return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22575 }
22576 
22577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsraq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)22578 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22579 {
22580   return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22581 }
22582 
22583 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsraq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)22584 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22585 {
22586   return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22587 }
22588 
22589 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsraq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)22590 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22591 {
22592   return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22593 }
22594 
22595 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsraq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)22596 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22597 {
22598   return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22599 }
22600 
22601 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsraq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)22602 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22603 {
22604   return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22605 }
22606 
22607 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsraq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)22608 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22609 {
22610   return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22611 }
22612 
22613 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vsrad_n_s64(int64_t __a,int64_t __b,const int __c)22614 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22615 {
22616   return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22617 }
22618 
22619 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vsrad_n_u64(uint64_t __a,uint64_t __b,const int __c)22620 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22621 {
22622   return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22623 }
22624 
22625 /* vsri */
22626 
22627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsri_n_s8(int8x8_t __a,int8x8_t __b,const int __c)22628 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22629 {
22630   return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22631 }
22632 
22633 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsri_n_s16(int16x4_t __a,int16x4_t __b,const int __c)22634 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22635 {
22636   return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22637 }
22638 
22639 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsri_n_s32(int32x2_t __a,int32x2_t __b,const int __c)22640 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22641 {
22642   return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22643 }
22644 
22645 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsri_n_s64(int64x1_t __a,int64x1_t __b,const int __c)22646 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22647 {
22648   return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22649 }
22650 
22651 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsri_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)22652 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22653 {
22654   return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22655 }
22656 
22657 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsri_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)22658 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22659 {
22660   return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22661 }
22662 
22663 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsri_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)22664 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22665 {
22666   return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22667 }
22668 
22669 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsri_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)22670 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22671 {
22672   return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22673 }
22674 
22675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsriq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)22676 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22677 {
22678   return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22679 }
22680 
22681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsriq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)22682 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22683 {
22684   return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22685 }
22686 
22687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsriq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)22688 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22689 {
22690   return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22691 }
22692 
22693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsriq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)22694 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22695 {
22696   return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22697 }
22698 
22699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsriq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)22700 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22701 {
22702   return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22703 }
22704 
22705 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsriq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)22706 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22707 {
22708   return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22709 }
22710 
22711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsriq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)22712 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22713 {
22714   return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22715 }
22716 
22717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsriq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)22718 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22719 {
22720   return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22721 }
22722 
22723 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vsrid_n_s64(int64_t __a,int64_t __b,const int __c)22724 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22725 {
22726   return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22727 }
22728 
22729 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vsrid_n_u64(uint64_t __a,uint64_t __b,const int __c)22730 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22731 {
22732   return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22733 }
22734 
22735 /* vst1 */
22736 
22737 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f16(float16_t * __a,float16x4_t __b)22738 vst1_f16 (float16_t *__a, float16x4_t __b)
22739 {
22740   __builtin_aarch64_st1v4hf (__a, __b);
22741 }
22742 
22743 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32(float32_t * a,float32x2_t b)22744 vst1_f32 (float32_t *a, float32x2_t b)
22745 {
22746   __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22747 }
22748 
22749 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f64(float64_t * a,float64x1_t b)22750 vst1_f64 (float64_t *a, float64x1_t b)
22751 {
22752   *a = b[0];
22753 }
22754 
22755 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_p8(poly8_t * a,poly8x8_t b)22756 vst1_p8 (poly8_t *a, poly8x8_t b)
22757 {
22758   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22759 			     (int8x8_t) b);
22760 }
22761 
22762 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_p16(poly16_t * a,poly16x4_t b)22763 vst1_p16 (poly16_t *a, poly16x4_t b)
22764 {
22765   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22766 			     (int16x4_t) b);
22767 }
22768 
22769 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s8(int8_t * a,int8x8_t b)22770 vst1_s8 (int8_t *a, int8x8_t b)
22771 {
22772   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22773 }
22774 
22775 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s16(int16_t * a,int16x4_t b)22776 vst1_s16 (int16_t *a, int16x4_t b)
22777 {
22778   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22779 }
22780 
22781 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s32(int32_t * a,int32x2_t b)22782 vst1_s32 (int32_t *a, int32x2_t b)
22783 {
22784   __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22785 }
22786 
22787 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s64(int64_t * a,int64x1_t b)22788 vst1_s64 (int64_t *a, int64x1_t b)
22789 {
22790   *a = b[0];
22791 }
22792 
22793 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u8(uint8_t * a,uint8x8_t b)22794 vst1_u8 (uint8_t *a, uint8x8_t b)
22795 {
22796   __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22797 			     (int8x8_t) b);
22798 }
22799 
22800 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u16(uint16_t * a,uint16x4_t b)22801 vst1_u16 (uint16_t *a, uint16x4_t b)
22802 {
22803   __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22804 			     (int16x4_t) b);
22805 }
22806 
22807 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u32(uint32_t * a,uint32x2_t b)22808 vst1_u32 (uint32_t *a, uint32x2_t b)
22809 {
22810   __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22811 			     (int32x2_t) b);
22812 }
22813 
22814 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u64(uint64_t * a,uint64x1_t b)22815 vst1_u64 (uint64_t *a, uint64x1_t b)
22816 {
22817   *a = b[0];
22818 }
22819 
22820 /* vst1q */
22821 
22822 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f16(float16_t * __a,float16x8_t __b)22823 vst1q_f16 (float16_t *__a, float16x8_t __b)
22824 {
22825   __builtin_aarch64_st1v8hf (__a, __b);
22826 }
22827 
22828 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32(float32_t * a,float32x4_t b)22829 vst1q_f32 (float32_t *a, float32x4_t b)
22830 {
22831   __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22832 }
22833 
22834 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f64(float64_t * a,float64x2_t b)22835 vst1q_f64 (float64_t *a, float64x2_t b)
22836 {
22837   __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22838 }
22839 
22840 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p8(poly8_t * a,poly8x16_t b)22841 vst1q_p8 (poly8_t *a, poly8x16_t b)
22842 {
22843   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22844 			      (int8x16_t) b);
22845 }
22846 
22847 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p16(poly16_t * a,poly16x8_t b)22848 vst1q_p16 (poly16_t *a, poly16x8_t b)
22849 {
22850   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22851 			     (int16x8_t) b);
22852 }
22853 
22854 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s8(int8_t * a,int8x16_t b)22855 vst1q_s8 (int8_t *a, int8x16_t b)
22856 {
22857   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22858 }
22859 
22860 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s16(int16_t * a,int16x8_t b)22861 vst1q_s16 (int16_t *a, int16x8_t b)
22862 {
22863   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22864 }
22865 
22866 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s32(int32_t * a,int32x4_t b)22867 vst1q_s32 (int32_t *a, int32x4_t b)
22868 {
22869   __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22870 }
22871 
22872 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s64(int64_t * a,int64x2_t b)22873 vst1q_s64 (int64_t *a, int64x2_t b)
22874 {
22875   __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22876 }
22877 
22878 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u8(uint8_t * a,uint8x16_t b)22879 vst1q_u8 (uint8_t *a, uint8x16_t b)
22880 {
22881   __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22882 			      (int8x16_t) b);
22883 }
22884 
22885 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u16(uint16_t * a,uint16x8_t b)22886 vst1q_u16 (uint16_t *a, uint16x8_t b)
22887 {
22888   __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22889 			     (int16x8_t) b);
22890 }
22891 
22892 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u32(uint32_t * a,uint32x4_t b)22893 vst1q_u32 (uint32_t *a, uint32x4_t b)
22894 {
22895   __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22896 			     (int32x4_t) b);
22897 }
22898 
22899 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u64(uint64_t * a,uint64x2_t b)22900 vst1q_u64 (uint64_t *a, uint64x2_t b)
22901 {
22902   __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22903 			     (int64x2_t) b);
22904 }
22905 
22906 /* vst1_lane */
22907 
22908 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f16(float16_t * __a,float16x4_t __b,const int __lane)22909 vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
22910 {
22911   *__a = __aarch64_vget_lane_any (__b, __lane);
22912 }
22913 
22914 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32(float32_t * __a,float32x2_t __b,const int __lane)22915 vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
22916 {
22917   *__a = __aarch64_vget_lane_any (__b, __lane);
22918 }
22919 
22920 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f64(float64_t * __a,float64x1_t __b,const int __lane)22921 vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
22922 {
22923   *__a = __aarch64_vget_lane_any (__b, __lane);
22924 }
22925 
22926 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_p8(poly8_t * __a,poly8x8_t __b,const int __lane)22927 vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
22928 {
22929   *__a = __aarch64_vget_lane_any (__b, __lane);
22930 }
22931 
22932 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_p16(poly16_t * __a,poly16x4_t __b,const int __lane)22933 vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
22934 {
22935   *__a = __aarch64_vget_lane_any (__b, __lane);
22936 }
22937 
22938 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s8(int8_t * __a,int8x8_t __b,const int __lane)22939 vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
22940 {
22941   *__a = __aarch64_vget_lane_any (__b, __lane);
22942 }
22943 
22944 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s16(int16_t * __a,int16x4_t __b,const int __lane)22945 vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
22946 {
22947   *__a = __aarch64_vget_lane_any (__b, __lane);
22948 }
22949 
22950 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s32(int32_t * __a,int32x2_t __b,const int __lane)22951 vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
22952 {
22953   *__a = __aarch64_vget_lane_any (__b, __lane);
22954 }
22955 
22956 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s64(int64_t * __a,int64x1_t __b,const int __lane)22957 vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
22958 {
22959   *__a = __aarch64_vget_lane_any (__b, __lane);
22960 }
22961 
22962 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_u8(uint8_t * __a,uint8x8_t __b,const int __lane)22963 vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
22964 {
22965   *__a = __aarch64_vget_lane_any (__b, __lane);
22966 }
22967 
22968 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_u16(uint16_t * __a,uint16x4_t __b,const int __lane)22969 vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
22970 {
22971   *__a = __aarch64_vget_lane_any (__b, __lane);
22972 }
22973 
22974 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_u32(uint32_t * __a,uint32x2_t __b,const int __lane)22975 vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
22976 {
22977   *__a = __aarch64_vget_lane_any (__b, __lane);
22978 }
22979 
22980 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_u64(uint64_t * __a,uint64x1_t __b,const int __lane)22981 vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
22982 {
22983   *__a = __aarch64_vget_lane_any (__b, __lane);
22984 }
22985 
22986 /* vst1q_lane */
22987 
22988 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f16(float16_t * __a,float16x8_t __b,const int __lane)22989 vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
22990 {
22991   *__a = __aarch64_vget_lane_any (__b, __lane);
22992 }
22993 
22994 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32(float32_t * __a,float32x4_t __b,const int __lane)22995 vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
22996 {
22997   *__a = __aarch64_vget_lane_any (__b, __lane);
22998 }
22999 
23000 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f64(float64_t * __a,float64x2_t __b,const int __lane)23001 vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
23002 {
23003   *__a = __aarch64_vget_lane_any (__b, __lane);
23004 }
23005 
23006 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_p8(poly8_t * __a,poly8x16_t __b,const int __lane)23007 vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
23008 {
23009   *__a = __aarch64_vget_lane_any (__b, __lane);
23010 }
23011 
23012 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_p16(poly16_t * __a,poly16x8_t __b,const int __lane)23013 vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
23014 {
23015   *__a = __aarch64_vget_lane_any (__b, __lane);
23016 }
23017 
23018 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s8(int8_t * __a,int8x16_t __b,const int __lane)23019 vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
23020 {
23021   *__a = __aarch64_vget_lane_any (__b, __lane);
23022 }
23023 
23024 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s16(int16_t * __a,int16x8_t __b,const int __lane)23025 vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
23026 {
23027   *__a = __aarch64_vget_lane_any (__b, __lane);
23028 }
23029 
23030 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s32(int32_t * __a,int32x4_t __b,const int __lane)23031 vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
23032 {
23033   *__a = __aarch64_vget_lane_any (__b, __lane);
23034 }
23035 
23036 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s64(int64_t * __a,int64x2_t __b,const int __lane)23037 vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
23038 {
23039   *__a = __aarch64_vget_lane_any (__b, __lane);
23040 }
23041 
23042 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_u8(uint8_t * __a,uint8x16_t __b,const int __lane)23043 vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
23044 {
23045   *__a = __aarch64_vget_lane_any (__b, __lane);
23046 }
23047 
23048 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_u16(uint16_t * __a,uint16x8_t __b,const int __lane)23049 vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
23050 {
23051   *__a = __aarch64_vget_lane_any (__b, __lane);
23052 }
23053 
23054 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_u32(uint32_t * __a,uint32x4_t __b,const int __lane)23055 vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
23056 {
23057   *__a = __aarch64_vget_lane_any (__b, __lane);
23058 }
23059 
23060 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_u64(uint64_t * __a,uint64x2_t __b,const int __lane)23061 vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
23062 {
23063   *__a = __aarch64_vget_lane_any (__b, __lane);
23064 }
23065 
23066 /* vstn */
23067 
23068 __extension__ static __inline void
vst2_s64(int64_t * __a,int64x1x2_t val)23069 vst2_s64 (int64_t * __a, int64x1x2_t val)
23070 {
23071   __builtin_aarch64_simd_oi __o;
23072   int64x2x2_t temp;
23073   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23074   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23075   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23076   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23077   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23078 }
23079 
23080 __extension__ static __inline void
vst2_u64(uint64_t * __a,uint64x1x2_t val)23081 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
23082 {
23083   __builtin_aarch64_simd_oi __o;
23084   uint64x2x2_t temp;
23085   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23086   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23087   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23088   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23089   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23090 }
23091 
23092 __extension__ static __inline void
vst2_f64(float64_t * __a,float64x1x2_t val)23093 vst2_f64 (float64_t * __a, float64x1x2_t val)
23094 {
23095   __builtin_aarch64_simd_oi __o;
23096   float64x2x2_t temp;
23097   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23098   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23099   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
23100   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
23101   __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
23102 }
23103 
23104 __extension__ static __inline void
vst2_s8(int8_t * __a,int8x8x2_t val)23105 vst2_s8 (int8_t * __a, int8x8x2_t val)
23106 {
23107   __builtin_aarch64_simd_oi __o;
23108   int8x16x2_t temp;
23109   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23110   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23111   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23112   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23113   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23114 }
23115 
23116 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_p8(poly8_t * __a,poly8x8x2_t val)23117 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
23118 {
23119   __builtin_aarch64_simd_oi __o;
23120   poly8x16x2_t temp;
23121   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23122   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23123   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23124   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23125   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23126 }
23127 
23128 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_s16(int16_t * __a,int16x4x2_t val)23129 vst2_s16 (int16_t * __a, int16x4x2_t val)
23130 {
23131   __builtin_aarch64_simd_oi __o;
23132   int16x8x2_t temp;
23133   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23134   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23135   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23136   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23137   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23138 }
23139 
23140 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_p16(poly16_t * __a,poly16x4x2_t val)23141 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
23142 {
23143   __builtin_aarch64_simd_oi __o;
23144   poly16x8x2_t temp;
23145   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23146   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23147   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23148   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23149   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23150 }
23151 
23152 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_s32(int32_t * __a,int32x2x2_t val)23153 vst2_s32 (int32_t * __a, int32x2x2_t val)
23154 {
23155   __builtin_aarch64_simd_oi __o;
23156   int32x4x2_t temp;
23157   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23158   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23159   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23160   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23161   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23162 }
23163 
23164 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u8(uint8_t * __a,uint8x8x2_t val)23165 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
23166 {
23167   __builtin_aarch64_simd_oi __o;
23168   uint8x16x2_t temp;
23169   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23170   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23171   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23172   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23173   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23174 }
23175 
23176 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u16(uint16_t * __a,uint16x4x2_t val)23177 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
23178 {
23179   __builtin_aarch64_simd_oi __o;
23180   uint16x8x2_t temp;
23181   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23182   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23183   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23184   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23185   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23186 }
23187 
23188 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u32(uint32_t * __a,uint32x2x2_t val)23189 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
23190 {
23191   __builtin_aarch64_simd_oi __o;
23192   uint32x4x2_t temp;
23193   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23194   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23195   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23196   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23197   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23198 }
23199 
23200 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f16(float16_t * __a,float16x4x2_t val)23201 vst2_f16 (float16_t * __a, float16x4x2_t val)
23202 {
23203   __builtin_aarch64_simd_oi __o;
23204   float16x8x2_t temp;
23205   temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23206   temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23207   __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
23208   __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
23209   __builtin_aarch64_st2v4hf (__a, __o);
23210 }
23211 
23212 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32(float32_t * __a,float32x2x2_t val)23213 vst2_f32 (float32_t * __a, float32x2x2_t val)
23214 {
23215   __builtin_aarch64_simd_oi __o;
23216   float32x4x2_t temp;
23217   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23218   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23219   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
23220   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
23221   __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23222 }
23223 
23224 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s8(int8_t * __a,int8x16x2_t val)23225 vst2q_s8 (int8_t * __a, int8x16x2_t val)
23226 {
23227   __builtin_aarch64_simd_oi __o;
23228   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23229   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23230   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23231 }
23232 
23233 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_p8(poly8_t * __a,poly8x16x2_t val)23234 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
23235 {
23236   __builtin_aarch64_simd_oi __o;
23237   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23238   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23239   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23240 }
23241 
23242 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s16(int16_t * __a,int16x8x2_t val)23243 vst2q_s16 (int16_t * __a, int16x8x2_t val)
23244 {
23245   __builtin_aarch64_simd_oi __o;
23246   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23247   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23248   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23249 }
23250 
23251 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_p16(poly16_t * __a,poly16x8x2_t val)23252 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
23253 {
23254   __builtin_aarch64_simd_oi __o;
23255   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23256   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23257   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23258 }
23259 
23260 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s32(int32_t * __a,int32x4x2_t val)23261 vst2q_s32 (int32_t * __a, int32x4x2_t val)
23262 {
23263   __builtin_aarch64_simd_oi __o;
23264   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23265   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23266   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23267 }
23268 
23269 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s64(int64_t * __a,int64x2x2_t val)23270 vst2q_s64 (int64_t * __a, int64x2x2_t val)
23271 {
23272   __builtin_aarch64_simd_oi __o;
23273   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23274   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23275   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23276 }
23277 
23278 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u8(uint8_t * __a,uint8x16x2_t val)23279 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
23280 {
23281   __builtin_aarch64_simd_oi __o;
23282   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23283   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23284   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23285 }
23286 
23287 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u16(uint16_t * __a,uint16x8x2_t val)23288 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
23289 {
23290   __builtin_aarch64_simd_oi __o;
23291   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23292   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23293   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23294 }
23295 
23296 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u32(uint32_t * __a,uint32x4x2_t val)23297 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
23298 {
23299   __builtin_aarch64_simd_oi __o;
23300   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23301   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23302   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23303 }
23304 
23305 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u64(uint64_t * __a,uint64x2x2_t val)23306 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
23307 {
23308   __builtin_aarch64_simd_oi __o;
23309   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23310   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23311   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23312 }
23313 
23314 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f16(float16_t * __a,float16x8x2_t val)23315 vst2q_f16 (float16_t * __a, float16x8x2_t val)
23316 {
23317   __builtin_aarch64_simd_oi __o;
23318   __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
23319   __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
23320   __builtin_aarch64_st2v8hf (__a, __o);
23321 }
23322 
23323 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32(float32_t * __a,float32x4x2_t val)23324 vst2q_f32 (float32_t * __a, float32x4x2_t val)
23325 {
23326   __builtin_aarch64_simd_oi __o;
23327   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
23328   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
23329   __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23330 }
23331 
23332 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f64(float64_t * __a,float64x2x2_t val)23333 vst2q_f64 (float64_t * __a, float64x2x2_t val)
23334 {
23335   __builtin_aarch64_simd_oi __o;
23336   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
23337   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
23338   __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
23339 }
23340 
23341 __extension__ static __inline void
vst3_s64(int64_t * __a,int64x1x3_t val)23342 vst3_s64 (int64_t * __a, int64x1x3_t val)
23343 {
23344   __builtin_aarch64_simd_ci __o;
23345   int64x2x3_t temp;
23346   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23347   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23348   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23349   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23350   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23351   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23352   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23353 }
23354 
23355 __extension__ static __inline void
vst3_u64(uint64_t * __a,uint64x1x3_t val)23356 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
23357 {
23358   __builtin_aarch64_simd_ci __o;
23359   uint64x2x3_t temp;
23360   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23361   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23362   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23363   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23364   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23365   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23366   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23367 }
23368 
23369 __extension__ static __inline void
vst3_f64(float64_t * __a,float64x1x3_t val)23370 vst3_f64 (float64_t * __a, float64x1x3_t val)
23371 {
23372   __builtin_aarch64_simd_ci __o;
23373   float64x2x3_t temp;
23374   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23375   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23376   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23377   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
23378   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
23379   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
23380   __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
23381 }
23382 
23383 __extension__ static __inline void
vst3_s8(int8_t * __a,int8x8x3_t val)23384 vst3_s8 (int8_t * __a, int8x8x3_t val)
23385 {
23386   __builtin_aarch64_simd_ci __o;
23387   int8x16x3_t temp;
23388   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23389   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23390   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23391   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23392   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23393   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23394   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23395 }
23396 
23397 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_p8(poly8_t * __a,poly8x8x3_t val)23398 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
23399 {
23400   __builtin_aarch64_simd_ci __o;
23401   poly8x16x3_t temp;
23402   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23403   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23404   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23405   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23406   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23407   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23408   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23409 }
23410 
23411 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_s16(int16_t * __a,int16x4x3_t val)23412 vst3_s16 (int16_t * __a, int16x4x3_t val)
23413 {
23414   __builtin_aarch64_simd_ci __o;
23415   int16x8x3_t temp;
23416   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23417   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23418   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23419   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23420   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23421   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23422   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23423 }
23424 
23425 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_p16(poly16_t * __a,poly16x4x3_t val)23426 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
23427 {
23428   __builtin_aarch64_simd_ci __o;
23429   poly16x8x3_t temp;
23430   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23431   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23432   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23433   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23434   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23435   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23436   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23437 }
23438 
23439 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_s32(int32_t * __a,int32x2x3_t val)23440 vst3_s32 (int32_t * __a, int32x2x3_t val)
23441 {
23442   __builtin_aarch64_simd_ci __o;
23443   int32x4x3_t temp;
23444   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23445   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23446   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23447   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23448   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23449   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23450   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23451 }
23452 
23453 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u8(uint8_t * __a,uint8x8x3_t val)23454 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23455 {
23456   __builtin_aarch64_simd_ci __o;
23457   uint8x16x3_t temp;
23458   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23459   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23460   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23461   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23462   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23463   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23464   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23465 }
23466 
23467 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u16(uint16_t * __a,uint16x4x3_t val)23468 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23469 {
23470   __builtin_aarch64_simd_ci __o;
23471   uint16x8x3_t temp;
23472   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23473   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23474   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23475   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23476   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23477   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23478   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23479 }
23480 
23481 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u32(uint32_t * __a,uint32x2x3_t val)23482 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23483 {
23484   __builtin_aarch64_simd_ci __o;
23485   uint32x4x3_t temp;
23486   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23487   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23488   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23489   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23490   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23491   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23492   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23493 }
23494 
23495 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f16(float16_t * __a,float16x4x3_t val)23496 vst3_f16 (float16_t * __a, float16x4x3_t val)
23497 {
23498   __builtin_aarch64_simd_ci __o;
23499   float16x8x3_t temp;
23500   temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23501   temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23502   temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23503   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
23504   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
23505   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
23506   __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23507 }
23508 
23509 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32(float32_t * __a,float32x2x3_t val)23510 vst3_f32 (float32_t * __a, float32x2x3_t val)
23511 {
23512   __builtin_aarch64_simd_ci __o;
23513   float32x4x3_t temp;
23514   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23515   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23516   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23517   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23518   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23519   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23520   __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23521 }
23522 
23523 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s8(int8_t * __a,int8x16x3_t val)23524 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23525 {
23526   __builtin_aarch64_simd_ci __o;
23527   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23528   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23529   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23530   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23531 }
23532 
23533 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_p8(poly8_t * __a,poly8x16x3_t val)23534 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23535 {
23536   __builtin_aarch64_simd_ci __o;
23537   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23538   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23539   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23540   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23541 }
23542 
23543 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s16(int16_t * __a,int16x8x3_t val)23544 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23545 {
23546   __builtin_aarch64_simd_ci __o;
23547   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23548   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23549   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23550   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23551 }
23552 
23553 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_p16(poly16_t * __a,poly16x8x3_t val)23554 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23555 {
23556   __builtin_aarch64_simd_ci __o;
23557   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23558   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23559   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23560   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23561 }
23562 
23563 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s32(int32_t * __a,int32x4x3_t val)23564 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23565 {
23566   __builtin_aarch64_simd_ci __o;
23567   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23568   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23569   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23570   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23571 }
23572 
23573 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s64(int64_t * __a,int64x2x3_t val)23574 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23575 {
23576   __builtin_aarch64_simd_ci __o;
23577   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23578   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23579   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23580   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23581 }
23582 
23583 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u8(uint8_t * __a,uint8x16x3_t val)23584 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23585 {
23586   __builtin_aarch64_simd_ci __o;
23587   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23588   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23589   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23590   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23591 }
23592 
23593 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u16(uint16_t * __a,uint16x8x3_t val)23594 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23595 {
23596   __builtin_aarch64_simd_ci __o;
23597   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23598   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23599   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23600   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23601 }
23602 
23603 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u32(uint32_t * __a,uint32x4x3_t val)23604 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23605 {
23606   __builtin_aarch64_simd_ci __o;
23607   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23608   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23609   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23610   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23611 }
23612 
23613 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u64(uint64_t * __a,uint64x2x3_t val)23614 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23615 {
23616   __builtin_aarch64_simd_ci __o;
23617   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23618   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23619   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23620   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23621 }
23622 
23623 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f16(float16_t * __a,float16x8x3_t val)23624 vst3q_f16 (float16_t * __a, float16x8x3_t val)
23625 {
23626   __builtin_aarch64_simd_ci __o;
23627   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
23628   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
23629   __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
23630   __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
23631 }
23632 
23633 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32(float32_t * __a,float32x4x3_t val)23634 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23635 {
23636   __builtin_aarch64_simd_ci __o;
23637   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23638   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23639   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23640   __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23641 }
23642 
23643 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f64(float64_t * __a,float64x2x3_t val)23644 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23645 {
23646   __builtin_aarch64_simd_ci __o;
23647   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23648   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23649   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23650   __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23651 }
23652 
23653 __extension__ static __inline void
vst4_s64(int64_t * __a,int64x1x4_t val)23654 vst4_s64 (int64_t * __a, int64x1x4_t val)
23655 {
23656   __builtin_aarch64_simd_xi __o;
23657   int64x2x4_t temp;
23658   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23659   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23660   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23661   temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23662   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23663   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23664   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23665   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23666   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23667 }
23668 
23669 __extension__ static __inline void
vst4_u64(uint64_t * __a,uint64x1x4_t val)23670 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23671 {
23672   __builtin_aarch64_simd_xi __o;
23673   uint64x2x4_t temp;
23674   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23675   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23676   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23677   temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23678   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23679   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23680   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23681   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23682   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23683 }
23684 
23685 __extension__ static __inline void
vst4_f64(float64_t * __a,float64x1x4_t val)23686 vst4_f64 (float64_t * __a, float64x1x4_t val)
23687 {
23688   __builtin_aarch64_simd_xi __o;
23689   float64x2x4_t temp;
23690   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23691   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23692   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23693   temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23694   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23695   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23696   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23697   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23698   __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23699 }
23700 
23701 __extension__ static __inline void
vst4_s8(int8_t * __a,int8x8x4_t val)23702 vst4_s8 (int8_t * __a, int8x8x4_t val)
23703 {
23704   __builtin_aarch64_simd_xi __o;
23705   int8x16x4_t temp;
23706   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23707   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23708   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23709   temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23710   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23711   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23712   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23713   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23714   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23715 }
23716 
23717 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_p8(poly8_t * __a,poly8x8x4_t val)23718 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23719 {
23720   __builtin_aarch64_simd_xi __o;
23721   poly8x16x4_t temp;
23722   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23723   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23724   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23725   temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23726   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23727   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23728   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23729   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23730   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23731 }
23732 
23733 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_s16(int16_t * __a,int16x4x4_t val)23734 vst4_s16 (int16_t * __a, int16x4x4_t val)
23735 {
23736   __builtin_aarch64_simd_xi __o;
23737   int16x8x4_t temp;
23738   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23739   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23740   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23741   temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23742   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23743   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23744   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23745   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23746   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23747 }
23748 
23749 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_p16(poly16_t * __a,poly16x4x4_t val)23750 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23751 {
23752   __builtin_aarch64_simd_xi __o;
23753   poly16x8x4_t temp;
23754   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23755   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23756   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23757   temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23758   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23759   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23760   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23761   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23762   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23763 }
23764 
23765 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_s32(int32_t * __a,int32x2x4_t val)23766 vst4_s32 (int32_t * __a, int32x2x4_t val)
23767 {
23768   __builtin_aarch64_simd_xi __o;
23769   int32x4x4_t temp;
23770   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23771   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23772   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23773   temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23774   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23775   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23776   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23777   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23778   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23779 }
23780 
23781 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u8(uint8_t * __a,uint8x8x4_t val)23782 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23783 {
23784   __builtin_aarch64_simd_xi __o;
23785   uint8x16x4_t temp;
23786   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23787   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23788   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23789   temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23790   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23791   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23792   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23793   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23794   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23795 }
23796 
23797 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u16(uint16_t * __a,uint16x4x4_t val)23798 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23799 {
23800   __builtin_aarch64_simd_xi __o;
23801   uint16x8x4_t temp;
23802   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23803   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23804   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23805   temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23806   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23807   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23808   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23809   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23810   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23811 }
23812 
23813 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u32(uint32_t * __a,uint32x2x4_t val)23814 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23815 {
23816   __builtin_aarch64_simd_xi __o;
23817   uint32x4x4_t temp;
23818   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23819   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23820   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23821   temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23822   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23823   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23824   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23825   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23826   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23827 }
23828 
23829 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f16(float16_t * __a,float16x4x4_t val)23830 vst4_f16 (float16_t * __a, float16x4x4_t val)
23831 {
23832   __builtin_aarch64_simd_xi __o;
23833   float16x8x4_t temp;
23834   temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23835   temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23836   temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23837   temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
23838   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
23839   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
23840   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
23841   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
23842   __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23843 }
23844 
23845 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32(float32_t * __a,float32x2x4_t val)23846 vst4_f32 (float32_t * __a, float32x2x4_t val)
23847 {
23848   __builtin_aarch64_simd_xi __o;
23849   float32x4x4_t temp;
23850   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23851   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23852   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23853   temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23854   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23855   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23856   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23857   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23858   __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23859 }
23860 
23861 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s8(int8_t * __a,int8x16x4_t val)23862 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23863 {
23864   __builtin_aarch64_simd_xi __o;
23865   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23866   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23867   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23868   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23869   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23870 }
23871 
23872 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_p8(poly8_t * __a,poly8x16x4_t val)23873 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23874 {
23875   __builtin_aarch64_simd_xi __o;
23876   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23877   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23878   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23879   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23880   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23881 }
23882 
23883 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s16(int16_t * __a,int16x8x4_t val)23884 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23885 {
23886   __builtin_aarch64_simd_xi __o;
23887   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23888   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23889   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23890   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23891   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23892 }
23893 
23894 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_p16(poly16_t * __a,poly16x8x4_t val)23895 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23896 {
23897   __builtin_aarch64_simd_xi __o;
23898   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23899   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23900   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23901   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23902   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23903 }
23904 
23905 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s32(int32_t * __a,int32x4x4_t val)23906 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23907 {
23908   __builtin_aarch64_simd_xi __o;
23909   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23910   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23911   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23912   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23913   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23914 }
23915 
23916 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s64(int64_t * __a,int64x2x4_t val)23917 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23918 {
23919   __builtin_aarch64_simd_xi __o;
23920   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23921   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23922   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23923   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23924   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23925 }
23926 
23927 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u8(uint8_t * __a,uint8x16x4_t val)23928 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23929 {
23930   __builtin_aarch64_simd_xi __o;
23931   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23932   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23933   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23934   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23935   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23936 }
23937 
23938 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u16(uint16_t * __a,uint16x8x4_t val)23939 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23940 {
23941   __builtin_aarch64_simd_xi __o;
23942   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23943   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23944   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23945   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23946   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23947 }
23948 
23949 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u32(uint32_t * __a,uint32x4x4_t val)23950 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23951 {
23952   __builtin_aarch64_simd_xi __o;
23953   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23954   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23955   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23956   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23957   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23958 }
23959 
23960 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u64(uint64_t * __a,uint64x2x4_t val)23961 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23962 {
23963   __builtin_aarch64_simd_xi __o;
23964   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23965   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23966   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23967   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23968   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23969 }
23970 
23971 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f16(float16_t * __a,float16x8x4_t val)23972 vst4q_f16 (float16_t * __a, float16x8x4_t val)
23973 {
23974   __builtin_aarch64_simd_xi __o;
23975   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
23976   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
23977   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
23978   __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
23979   __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
23980 }
23981 
23982 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32(float32_t * __a,float32x4x4_t val)23983 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23984 {
23985   __builtin_aarch64_simd_xi __o;
23986   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23987   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23988   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23989   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23990   __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23991 }
23992 
23993 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f64(float64_t * __a,float64x2x4_t val)23994 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23995 {
23996   __builtin_aarch64_simd_xi __o;
23997   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23998   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23999   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24000   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24001   __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24002 }
24003 
24004 /* vsub */
24005 
24006 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vsubd_s64(int64_t __a,int64_t __b)24007 vsubd_s64 (int64_t __a, int64_t __b)
24008 {
24009   return __a - __b;
24010 }
24011 
24012 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vsubd_u64(uint64_t __a,uint64_t __b)24013 vsubd_u64 (uint64_t __a, uint64_t __b)
24014 {
24015   return __a - __b;
24016 }
24017 
24018 /* vtbx1  */
24019 
24020 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx1_s8(int8x8_t __r,int8x8_t __tab,int8x8_t __idx)24021 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24022 {
24023   uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24024 			      vmov_n_u8 (8));
24025   int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24026 
24027   return vbsl_s8 (__mask, __tbl, __r);
24028 }
24029 
24030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx1_u8(uint8x8_t __r,uint8x8_t __tab,uint8x8_t __idx)24031 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24032 {
24033   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24034   uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24035 
24036   return vbsl_u8 (__mask, __tbl, __r);
24037 }
24038 
24039 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx1_p8(poly8x8_t __r,poly8x8_t __tab,uint8x8_t __idx)24040 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24041 {
24042   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24043   poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24044 
24045   return vbsl_p8 (__mask, __tbl, __r);
24046 }
24047 
24048 /* vtbx3  */
24049 
24050 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx3_s8(int8x8_t __r,int8x8x3_t __tab,int8x8_t __idx)24051 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24052 {
24053   uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24054 			      vmov_n_u8 (24));
24055   int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24056 
24057   return vbsl_s8 (__mask, __tbl, __r);
24058 }
24059 
24060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx3_u8(uint8x8_t __r,uint8x8x3_t __tab,uint8x8_t __idx)24061 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24062 {
24063   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24064   uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24065 
24066   return vbsl_u8 (__mask, __tbl, __r);
24067 }
24068 
24069 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx3_p8(poly8x8_t __r,poly8x8x3_t __tab,uint8x8_t __idx)24070 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24071 {
24072   uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24073   poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24074 
24075   return vbsl_p8 (__mask, __tbl, __r);
24076 }
24077 
24078 /* vtbx4  */
24079 
24080 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx4_s8(int8x8_t __r,int8x8x4_t __tab,int8x8_t __idx)24081 vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
24082 {
24083   int8x8_t result;
24084   int8x16x2_t temp;
24085   __builtin_aarch64_simd_oi __o;
24086   temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
24087   temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
24088   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24089 					   (int8x16_t) temp.val[0], 0);
24090   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24091 					   (int8x16_t) temp.val[1], 1);
24092   result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
24093   return result;
24094 }
24095 
24096 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx4_u8(uint8x8_t __r,uint8x8x4_t __tab,uint8x8_t __idx)24097 vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
24098 {
24099   uint8x8_t result;
24100   uint8x16x2_t temp;
24101   __builtin_aarch64_simd_oi __o;
24102   temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
24103   temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
24104   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24105 					   (int8x16_t) temp.val[0], 0);
24106   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24107 					   (int8x16_t) temp.val[1], 1);
24108   result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24109 						  (int8x8_t)__idx);
24110   return result;
24111 }
24112 
24113 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx4_p8(poly8x8_t __r,poly8x8x4_t __tab,uint8x8_t __idx)24114 vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
24115 {
24116   poly8x8_t result;
24117   poly8x16x2_t temp;
24118   __builtin_aarch64_simd_oi __o;
24119   temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
24120   temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
24121   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24122 					   (int8x16_t) temp.val[0], 0);
24123   __o = __builtin_aarch64_set_qregoiv16qi (__o,
24124 					   (int8x16_t) temp.val[1], 1);
24125   result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24126 						  (int8x8_t)__idx);
24127   return result;
24128 }
24129 
24130 /* vtrn */
24131 
24132 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vtrn1_f32(float32x2_t __a,float32x2_t __b)24133 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
24134 {
24135 #ifdef __AARCH64EB__
24136   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24137 #else
24138   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24139 #endif
24140 }
24141 
24142 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtrn1_p8(poly8x8_t __a,poly8x8_t __b)24143 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
24144 {
24145 #ifdef __AARCH64EB__
24146   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24147 #else
24148   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24149 #endif
24150 }
24151 
24152 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtrn1_p16(poly16x4_t __a,poly16x4_t __b)24153 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
24154 {
24155 #ifdef __AARCH64EB__
24156   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24157 #else
24158   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24159 #endif
24160 }
24161 
24162 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtrn1_s8(int8x8_t __a,int8x8_t __b)24163 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
24164 {
24165 #ifdef __AARCH64EB__
24166   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24167 #else
24168   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24169 #endif
24170 }
24171 
24172 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtrn1_s16(int16x4_t __a,int16x4_t __b)24173 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
24174 {
24175 #ifdef __AARCH64EB__
24176   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24177 #else
24178   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24179 #endif
24180 }
24181 
24182 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtrn1_s32(int32x2_t __a,int32x2_t __b)24183 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
24184 {
24185 #ifdef __AARCH64EB__
24186   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24187 #else
24188   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24189 #endif
24190 }
24191 
24192 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtrn1_u8(uint8x8_t __a,uint8x8_t __b)24193 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
24194 {
24195 #ifdef __AARCH64EB__
24196   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24197 #else
24198   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24199 #endif
24200 }
24201 
24202 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtrn1_u16(uint16x4_t __a,uint16x4_t __b)24203 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
24204 {
24205 #ifdef __AARCH64EB__
24206   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24207 #else
24208   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24209 #endif
24210 }
24211 
24212 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtrn1_u32(uint32x2_t __a,uint32x2_t __b)24213 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
24214 {
24215 #ifdef __AARCH64EB__
24216   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24217 #else
24218   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24219 #endif
24220 }
24221 
24222 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vtrn1q_f32(float32x4_t __a,float32x4_t __b)24223 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
24224 {
24225 #ifdef __AARCH64EB__
24226   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24227 #else
24228   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24229 #endif
24230 }
24231 
24232 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vtrn1q_f64(float64x2_t __a,float64x2_t __b)24233 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
24234 {
24235 #ifdef __AARCH64EB__
24236   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24237 #else
24238   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24239 #endif
24240 }
24241 
24242 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vtrn1q_p8(poly8x16_t __a,poly8x16_t __b)24243 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
24244 {
24245 #ifdef __AARCH64EB__
24246   return __builtin_shuffle (__a, __b,
24247       (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24248 #else
24249   return __builtin_shuffle (__a, __b,
24250       (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24251 #endif
24252 }
24253 
24254 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vtrn1q_p16(poly16x8_t __a,poly16x8_t __b)24255 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
24256 {
24257 #ifdef __AARCH64EB__
24258   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24259 #else
24260   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24261 #endif
24262 }
24263 
24264 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtrn1q_s8(int8x16_t __a,int8x16_t __b)24265 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
24266 {
24267 #ifdef __AARCH64EB__
24268   return __builtin_shuffle (__a, __b,
24269       (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24270 #else
24271   return __builtin_shuffle (__a, __b,
24272       (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24273 #endif
24274 }
24275 
24276 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtrn1q_s16(int16x8_t __a,int16x8_t __b)24277 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
24278 {
24279 #ifdef __AARCH64EB__
24280   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24281 #else
24282   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24283 #endif
24284 }
24285 
24286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtrn1q_s32(int32x4_t __a,int32x4_t __b)24287 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
24288 {
24289 #ifdef __AARCH64EB__
24290   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24291 #else
24292   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24293 #endif
24294 }
24295 
24296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtrn1q_s64(int64x2_t __a,int64x2_t __b)24297 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
24298 {
24299 #ifdef __AARCH64EB__
24300   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24301 #else
24302   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24303 #endif
24304 }
24305 
24306 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtrn1q_u8(uint8x16_t __a,uint8x16_t __b)24307 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
24308 {
24309 #ifdef __AARCH64EB__
24310   return __builtin_shuffle (__a, __b,
24311       (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24312 #else
24313   return __builtin_shuffle (__a, __b,
24314       (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24315 #endif
24316 }
24317 
24318 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtrn1q_u16(uint16x8_t __a,uint16x8_t __b)24319 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
24320 {
24321 #ifdef __AARCH64EB__
24322   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24323 #else
24324   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24325 #endif
24326 }
24327 
24328 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtrn1q_u32(uint32x4_t __a,uint32x4_t __b)24329 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
24330 {
24331 #ifdef __AARCH64EB__
24332   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24333 #else
24334   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24335 #endif
24336 }
24337 
24338 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtrn1q_u64(uint64x2_t __a,uint64x2_t __b)24339 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
24340 {
24341 #ifdef __AARCH64EB__
24342   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24343 #else
24344   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24345 #endif
24346 }
24347 
24348 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vtrn2_f32(float32x2_t __a,float32x2_t __b)24349 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
24350 {
24351 #ifdef __AARCH64EB__
24352   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24353 #else
24354   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24355 #endif
24356 }
24357 
24358 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtrn2_p8(poly8x8_t __a,poly8x8_t __b)24359 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
24360 {
24361 #ifdef __AARCH64EB__
24362   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24363 #else
24364   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24365 #endif
24366 }
24367 
24368 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtrn2_p16(poly16x4_t __a,poly16x4_t __b)24369 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
24370 {
24371 #ifdef __AARCH64EB__
24372   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24373 #else
24374   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24375 #endif
24376 }
24377 
24378 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtrn2_s8(int8x8_t __a,int8x8_t __b)24379 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
24380 {
24381 #ifdef __AARCH64EB__
24382   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24383 #else
24384   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24385 #endif
24386 }
24387 
24388 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtrn2_s16(int16x4_t __a,int16x4_t __b)24389 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
24390 {
24391 #ifdef __AARCH64EB__
24392   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24393 #else
24394   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24395 #endif
24396 }
24397 
24398 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtrn2_s32(int32x2_t __a,int32x2_t __b)24399 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
24400 {
24401 #ifdef __AARCH64EB__
24402   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24403 #else
24404   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24405 #endif
24406 }
24407 
24408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtrn2_u8(uint8x8_t __a,uint8x8_t __b)24409 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
24410 {
24411 #ifdef __AARCH64EB__
24412   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24413 #else
24414   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24415 #endif
24416 }
24417 
24418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtrn2_u16(uint16x4_t __a,uint16x4_t __b)24419 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
24420 {
24421 #ifdef __AARCH64EB__
24422   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24423 #else
24424   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24425 #endif
24426 }
24427 
24428 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtrn2_u32(uint32x2_t __a,uint32x2_t __b)24429 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
24430 {
24431 #ifdef __AARCH64EB__
24432   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24433 #else
24434   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24435 #endif
24436 }
24437 
24438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vtrn2q_f32(float32x4_t __a,float32x4_t __b)24439 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
24440 {
24441 #ifdef __AARCH64EB__
24442   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24443 #else
24444   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24445 #endif
24446 }
24447 
24448 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vtrn2q_f64(float64x2_t __a,float64x2_t __b)24449 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
24450 {
24451 #ifdef __AARCH64EB__
24452   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24453 #else
24454   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24455 #endif
24456 }
24457 
24458 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vtrn2q_p8(poly8x16_t __a,poly8x16_t __b)24459 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
24460 {
24461 #ifdef __AARCH64EB__
24462   return __builtin_shuffle (__a, __b,
24463       (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24464 #else
24465   return __builtin_shuffle (__a, __b,
24466       (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24467 #endif
24468 }
24469 
24470 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vtrn2q_p16(poly16x8_t __a,poly16x8_t __b)24471 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
24472 {
24473 #ifdef __AARCH64EB__
24474   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24475 #else
24476   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24477 #endif
24478 }
24479 
24480 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtrn2q_s8(int8x16_t __a,int8x16_t __b)24481 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
24482 {
24483 #ifdef __AARCH64EB__
24484   return __builtin_shuffle (__a, __b,
24485       (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24486 #else
24487   return __builtin_shuffle (__a, __b,
24488       (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24489 #endif
24490 }
24491 
24492 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtrn2q_s16(int16x8_t __a,int16x8_t __b)24493 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
24494 {
24495 #ifdef __AARCH64EB__
24496   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24497 #else
24498   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24499 #endif
24500 }
24501 
24502 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtrn2q_s32(int32x4_t __a,int32x4_t __b)24503 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
24504 {
24505 #ifdef __AARCH64EB__
24506   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24507 #else
24508   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24509 #endif
24510 }
24511 
24512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtrn2q_s64(int64x2_t __a,int64x2_t __b)24513 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
24514 {
24515 #ifdef __AARCH64EB__
24516   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24517 #else
24518   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24519 #endif
24520 }
24521 
24522 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtrn2q_u8(uint8x16_t __a,uint8x16_t __b)24523 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
24524 {
24525 #ifdef __AARCH64EB__
24526   return __builtin_shuffle (__a, __b,
24527       (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24528 #else
24529   return __builtin_shuffle (__a, __b,
24530       (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24531 #endif
24532 }
24533 
24534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtrn2q_u16(uint16x8_t __a,uint16x8_t __b)24535 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24536 {
24537 #ifdef __AARCH64EB__
24538   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24539 #else
24540   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24541 #endif
24542 }
24543 
24544 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtrn2q_u32(uint32x4_t __a,uint32x4_t __b)24545 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24546 {
24547 #ifdef __AARCH64EB__
24548   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24549 #else
24550   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24551 #endif
24552 }
24553 
24554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtrn2q_u64(uint64x2_t __a,uint64x2_t __b)24555 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24556 {
24557 #ifdef __AARCH64EB__
24558   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24559 #else
24560   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24561 #endif
24562 }
24563 
24564 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32(float32x2_t a,float32x2_t b)24565 vtrn_f32 (float32x2_t a, float32x2_t b)
24566 {
24567   return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24568 }
24569 
24570 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vtrn_p8(poly8x8_t a,poly8x8_t b)24571 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24572 {
24573   return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24574 }
24575 
24576 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vtrn_p16(poly16x4_t a,poly16x4_t b)24577 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24578 {
24579   return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24580 }
24581 
24582 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vtrn_s8(int8x8_t a,int8x8_t b)24583 vtrn_s8 (int8x8_t a, int8x8_t b)
24584 {
24585   return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24586 }
24587 
24588 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vtrn_s16(int16x4_t a,int16x4_t b)24589 vtrn_s16 (int16x4_t a, int16x4_t b)
24590 {
24591   return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24592 }
24593 
24594 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vtrn_s32(int32x2_t a,int32x2_t b)24595 vtrn_s32 (int32x2_t a, int32x2_t b)
24596 {
24597   return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24598 }
24599 
24600 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vtrn_u8(uint8x8_t a,uint8x8_t b)24601 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24602 {
24603   return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24604 }
24605 
24606 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vtrn_u16(uint16x4_t a,uint16x4_t b)24607 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24608 {
24609   return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24610 }
24611 
24612 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vtrn_u32(uint32x2_t a,uint32x2_t b)24613 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24614 {
24615   return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24616 }
24617 
24618 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vtrnq_f32(float32x4_t a,float32x4_t b)24619 vtrnq_f32 (float32x4_t a, float32x4_t b)
24620 {
24621   return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24622 }
24623 
24624 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vtrnq_p8(poly8x16_t a,poly8x16_t b)24625 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24626 {
24627   return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24628 }
24629 
24630 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vtrnq_p16(poly16x8_t a,poly16x8_t b)24631 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24632 {
24633   return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24634 }
24635 
24636 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vtrnq_s8(int8x16_t a,int8x16_t b)24637 vtrnq_s8 (int8x16_t a, int8x16_t b)
24638 {
24639   return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24640 }
24641 
24642 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vtrnq_s16(int16x8_t a,int16x8_t b)24643 vtrnq_s16 (int16x8_t a, int16x8_t b)
24644 {
24645   return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24646 }
24647 
24648 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vtrnq_s32(int32x4_t a,int32x4_t b)24649 vtrnq_s32 (int32x4_t a, int32x4_t b)
24650 {
24651   return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24652 }
24653 
24654 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vtrnq_u8(uint8x16_t a,uint8x16_t b)24655 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24656 {
24657   return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24658 }
24659 
24660 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vtrnq_u16(uint16x8_t a,uint16x8_t b)24661 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24662 {
24663   return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24664 }
24665 
24666 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vtrnq_u32(uint32x4_t a,uint32x4_t b)24667 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24668 {
24669   return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24670 }
24671 
24672 /* vtst */
24673 
24674 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_s8(int8x8_t __a,int8x8_t __b)24675 vtst_s8 (int8x8_t __a, int8x8_t __b)
24676 {
24677   return (uint8x8_t) ((__a & __b) != 0);
24678 }
24679 
24680 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_s16(int16x4_t __a,int16x4_t __b)24681 vtst_s16 (int16x4_t __a, int16x4_t __b)
24682 {
24683   return (uint16x4_t) ((__a & __b) != 0);
24684 }
24685 
24686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_s32(int32x2_t __a,int32x2_t __b)24687 vtst_s32 (int32x2_t __a, int32x2_t __b)
24688 {
24689   return (uint32x2_t) ((__a & __b) != 0);
24690 }
24691 
24692 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_s64(int64x1_t __a,int64x1_t __b)24693 vtst_s64 (int64x1_t __a, int64x1_t __b)
24694 {
24695   return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
24696 }
24697 
24698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_u8(uint8x8_t __a,uint8x8_t __b)24699 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24700 {
24701   return ((__a & __b) != 0);
24702 }
24703 
24704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_u16(uint16x4_t __a,uint16x4_t __b)24705 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24706 {
24707   return ((__a & __b) != 0);
24708 }
24709 
24710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_u32(uint32x2_t __a,uint32x2_t __b)24711 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24712 {
24713   return ((__a & __b) != 0);
24714 }
24715 
24716 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_u64(uint64x1_t __a,uint64x1_t __b)24717 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24718 {
24719   return ((__a & __b) != __AARCH64_UINT64_C (0));
24720 }
24721 
24722 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_s8(int8x16_t __a,int8x16_t __b)24723 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24724 {
24725   return (uint8x16_t) ((__a & __b) != 0);
24726 }
24727 
24728 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_s16(int16x8_t __a,int16x8_t __b)24729 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24730 {
24731   return (uint16x8_t) ((__a & __b) != 0);
24732 }
24733 
24734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_s32(int32x4_t __a,int32x4_t __b)24735 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24736 {
24737   return (uint32x4_t) ((__a & __b) != 0);
24738 }
24739 
24740 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_s64(int64x2_t __a,int64x2_t __b)24741 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24742 {
24743   return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24744 }
24745 
24746 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_u8(uint8x16_t __a,uint8x16_t __b)24747 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24748 {
24749   return ((__a & __b) != 0);
24750 }
24751 
24752 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_u16(uint16x8_t __a,uint16x8_t __b)24753 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24754 {
24755   return ((__a & __b) != 0);
24756 }
24757 
24758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_u32(uint32x4_t __a,uint32x4_t __b)24759 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24760 {
24761   return ((__a & __b) != 0);
24762 }
24763 
24764 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_u64(uint64x2_t __a,uint64x2_t __b)24765 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24766 {
24767   return ((__a & __b) != __AARCH64_UINT64_C (0));
24768 }
24769 
24770 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vtstd_s64(int64_t __a,int64_t __b)24771 vtstd_s64 (int64_t __a, int64_t __b)
24772 {
24773   return (__a & __b) ? -1ll : 0ll;
24774 }
24775 
24776 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vtstd_u64(uint64_t __a,uint64_t __b)24777 vtstd_u64 (uint64_t __a, uint64_t __b)
24778 {
24779   return (__a & __b) ? -1ll : 0ll;
24780 }
24781 
24782 /* vuqadd */
24783 
24784 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuqadd_s8(int8x8_t __a,uint8x8_t __b)24785 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24786 {
24787   return __builtin_aarch64_suqaddv8qi_ssu (__a,  __b);
24788 }
24789 
24790 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuqadd_s16(int16x4_t __a,uint16x4_t __b)24791 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24792 {
24793   return __builtin_aarch64_suqaddv4hi_ssu (__a,  __b);
24794 }
24795 
24796 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuqadd_s32(int32x2_t __a,uint32x2_t __b)24797 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24798 {
24799   return __builtin_aarch64_suqaddv2si_ssu (__a,  __b);
24800 }
24801 
24802 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vuqadd_s64(int64x1_t __a,uint64x1_t __b)24803 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24804 {
24805   return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24806 }
24807 
24808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuqaddq_s8(int8x16_t __a,uint8x16_t __b)24809 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24810 {
24811   return __builtin_aarch64_suqaddv16qi_ssu (__a,  __b);
24812 }
24813 
24814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuqaddq_s16(int16x8_t __a,uint16x8_t __b)24815 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24816 {
24817   return __builtin_aarch64_suqaddv8hi_ssu (__a,  __b);
24818 }
24819 
24820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuqaddq_s32(int32x4_t __a,uint32x4_t __b)24821 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24822 {
24823   return __builtin_aarch64_suqaddv4si_ssu (__a,  __b);
24824 }
24825 
24826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuqaddq_s64(int64x2_t __a,uint64x2_t __b)24827 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24828 {
24829   return __builtin_aarch64_suqaddv2di_ssu (__a,  __b);
24830 }
24831 
24832 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vuqaddb_s8(int8_t __a,uint8_t __b)24833 vuqaddb_s8 (int8_t __a, uint8_t __b)
24834 {
24835   return __builtin_aarch64_suqaddqi_ssu (__a,  __b);
24836 }
24837 
24838 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vuqaddh_s16(int16_t __a,uint16_t __b)24839 vuqaddh_s16 (int16_t __a, uint16_t __b)
24840 {
24841   return __builtin_aarch64_suqaddhi_ssu (__a,  __b);
24842 }
24843 
24844 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vuqadds_s32(int32_t __a,uint32_t __b)24845 vuqadds_s32 (int32_t __a, uint32_t __b)
24846 {
24847   return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
24848 }
24849 
24850 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vuqaddd_s64(int64_t __a,uint64_t __b)24851 vuqaddd_s64 (int64_t __a, uint64_t __b)
24852 {
24853   return __builtin_aarch64_suqadddi_ssu (__a,  __b);
24854 }
24855 
24856 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
24857   __extension__ static __inline rettype					\
24858   __attribute__ ((__always_inline__))					\
24859   v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
24860   {									\
24861     return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
24862 		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
24863   }
24864 
24865 #define __INTERLEAVE_LIST(op)					\
24866   __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
24867   __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
24868   __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
24869   __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
24870   __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
24871   __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
24872   __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
24873   __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
24874   __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
24875   __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
24876   __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
24877   __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
24878   __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
24879   __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
24880   __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
24881   __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
24882   __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
24883   __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24884 
24885 /* vuzp */
24886 
24887 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp1_f32(float32x2_t __a,float32x2_t __b)24888 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24889 {
24890 #ifdef __AARCH64EB__
24891   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24892 #else
24893   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24894 #endif
24895 }
24896 
24897 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp1_p8(poly8x8_t __a,poly8x8_t __b)24898 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24899 {
24900 #ifdef __AARCH64EB__
24901   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24902 #else
24903   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24904 #endif
24905 }
24906 
24907 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp1_p16(poly16x4_t __a,poly16x4_t __b)24908 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24909 {
24910 #ifdef __AARCH64EB__
24911   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24912 #else
24913   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24914 #endif
24915 }
24916 
24917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp1_s8(int8x8_t __a,int8x8_t __b)24918 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24919 {
24920 #ifdef __AARCH64EB__
24921   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24922 #else
24923   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24924 #endif
24925 }
24926 
24927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp1_s16(int16x4_t __a,int16x4_t __b)24928 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24929 {
24930 #ifdef __AARCH64EB__
24931   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24932 #else
24933   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24934 #endif
24935 }
24936 
24937 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp1_s32(int32x2_t __a,int32x2_t __b)24938 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24939 {
24940 #ifdef __AARCH64EB__
24941   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24942 #else
24943   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24944 #endif
24945 }
24946 
24947 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp1_u8(uint8x8_t __a,uint8x8_t __b)24948 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24949 {
24950 #ifdef __AARCH64EB__
24951   return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24952 #else
24953   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24954 #endif
24955 }
24956 
24957 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp1_u16(uint16x4_t __a,uint16x4_t __b)24958 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24959 {
24960 #ifdef __AARCH64EB__
24961   return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24962 #else
24963   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24964 #endif
24965 }
24966 
24967 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp1_u32(uint32x2_t __a,uint32x2_t __b)24968 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24969 {
24970 #ifdef __AARCH64EB__
24971   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24972 #else
24973   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24974 #endif
24975 }
24976 
24977 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp1q_f32(float32x4_t __a,float32x4_t __b)24978 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24979 {
24980 #ifdef __AARCH64EB__
24981   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24982 #else
24983   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24984 #endif
24985 }
24986 
24987 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp1q_f64(float64x2_t __a,float64x2_t __b)24988 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24989 {
24990 #ifdef __AARCH64EB__
24991   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24992 #else
24993   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24994 #endif
24995 }
24996 
24997 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp1q_p8(poly8x16_t __a,poly8x16_t __b)24998 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24999 {
25000 #ifdef __AARCH64EB__
25001   return __builtin_shuffle (__a, __b, (uint8x16_t)
25002       {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25003 #else
25004   return __builtin_shuffle (__a, __b, (uint8x16_t)
25005       {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25006 #endif
25007 }
25008 
25009 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp1q_p16(poly16x8_t __a,poly16x8_t __b)25010 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
25011 {
25012 #ifdef __AARCH64EB__
25013   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25014 #else
25015   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25016 #endif
25017 }
25018 
25019 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp1q_s8(int8x16_t __a,int8x16_t __b)25020 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
25021 {
25022 #ifdef __AARCH64EB__
25023   return __builtin_shuffle (__a, __b,
25024       (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25025 #else
25026   return __builtin_shuffle (__a, __b,
25027       (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25028 #endif
25029 }
25030 
25031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp1q_s16(int16x8_t __a,int16x8_t __b)25032 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
25033 {
25034 #ifdef __AARCH64EB__
25035   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25036 #else
25037   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25038 #endif
25039 }
25040 
25041 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp1q_s32(int32x4_t __a,int32x4_t __b)25042 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
25043 {
25044 #ifdef __AARCH64EB__
25045   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25046 #else
25047   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25048 #endif
25049 }
25050 
25051 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp1q_s64(int64x2_t __a,int64x2_t __b)25052 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
25053 {
25054 #ifdef __AARCH64EB__
25055   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25056 #else
25057   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25058 #endif
25059 }
25060 
25061 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp1q_u8(uint8x16_t __a,uint8x16_t __b)25062 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
25063 {
25064 #ifdef __AARCH64EB__
25065   return __builtin_shuffle (__a, __b,
25066       (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25067 #else
25068   return __builtin_shuffle (__a, __b,
25069       (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25070 #endif
25071 }
25072 
25073 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp1q_u16(uint16x8_t __a,uint16x8_t __b)25074 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
25075 {
25076 #ifdef __AARCH64EB__
25077   return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25078 #else
25079   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25080 #endif
25081 }
25082 
25083 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp1q_u32(uint32x4_t __a,uint32x4_t __b)25084 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
25085 {
25086 #ifdef __AARCH64EB__
25087   return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25088 #else
25089   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25090 #endif
25091 }
25092 
25093 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp1q_u64(uint64x2_t __a,uint64x2_t __b)25094 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
25095 {
25096 #ifdef __AARCH64EB__
25097   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25098 #else
25099   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25100 #endif
25101 }
25102 
25103 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp2_f32(float32x2_t __a,float32x2_t __b)25104 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
25105 {
25106 #ifdef __AARCH64EB__
25107   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25108 #else
25109   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25110 #endif
25111 }
25112 
25113 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp2_p8(poly8x8_t __a,poly8x8_t __b)25114 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
25115 {
25116 #ifdef __AARCH64EB__
25117   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25118 #else
25119   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25120 #endif
25121 }
25122 
25123 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp2_p16(poly16x4_t __a,poly16x4_t __b)25124 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
25125 {
25126 #ifdef __AARCH64EB__
25127   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25128 #else
25129   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25130 #endif
25131 }
25132 
25133 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp2_s8(int8x8_t __a,int8x8_t __b)25134 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
25135 {
25136 #ifdef __AARCH64EB__
25137   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25138 #else
25139   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25140 #endif
25141 }
25142 
25143 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp2_s16(int16x4_t __a,int16x4_t __b)25144 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
25145 {
25146 #ifdef __AARCH64EB__
25147   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25148 #else
25149   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25150 #endif
25151 }
25152 
25153 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp2_s32(int32x2_t __a,int32x2_t __b)25154 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
25155 {
25156 #ifdef __AARCH64EB__
25157   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25158 #else
25159   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25160 #endif
25161 }
25162 
25163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp2_u8(uint8x8_t __a,uint8x8_t __b)25164 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
25165 {
25166 #ifdef __AARCH64EB__
25167   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25168 #else
25169   return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25170 #endif
25171 }
25172 
25173 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp2_u16(uint16x4_t __a,uint16x4_t __b)25174 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
25175 {
25176 #ifdef __AARCH64EB__
25177   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25178 #else
25179   return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25180 #endif
25181 }
25182 
25183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp2_u32(uint32x2_t __a,uint32x2_t __b)25184 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
25185 {
25186 #ifdef __AARCH64EB__
25187   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25188 #else
25189   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25190 #endif
25191 }
25192 
25193 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp2q_f32(float32x4_t __a,float32x4_t __b)25194 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
25195 {
25196 #ifdef __AARCH64EB__
25197   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25198 #else
25199   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25200 #endif
25201 }
25202 
25203 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp2q_f64(float64x2_t __a,float64x2_t __b)25204 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
25205 {
25206 #ifdef __AARCH64EB__
25207   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25208 #else
25209   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25210 #endif
25211 }
25212 
25213 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp2q_p8(poly8x16_t __a,poly8x16_t __b)25214 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
25215 {
25216 #ifdef __AARCH64EB__
25217   return __builtin_shuffle (__a, __b,
25218       (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25219 #else
25220   return __builtin_shuffle (__a, __b,
25221       (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25222 #endif
25223 }
25224 
25225 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp2q_p16(poly16x8_t __a,poly16x8_t __b)25226 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
25227 {
25228 #ifdef __AARCH64EB__
25229   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25230 #else
25231   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25232 #endif
25233 }
25234 
25235 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp2q_s8(int8x16_t __a,int8x16_t __b)25236 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
25237 {
25238 #ifdef __AARCH64EB__
25239   return __builtin_shuffle (__a, __b,
25240       (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25241 #else
25242   return __builtin_shuffle (__a, __b,
25243       (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25244 #endif
25245 }
25246 
25247 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp2q_s16(int16x8_t __a,int16x8_t __b)25248 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
25249 {
25250 #ifdef __AARCH64EB__
25251   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25252 #else
25253   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25254 #endif
25255 }
25256 
25257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp2q_s32(int32x4_t __a,int32x4_t __b)25258 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
25259 {
25260 #ifdef __AARCH64EB__
25261   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25262 #else
25263   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25264 #endif
25265 }
25266 
25267 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp2q_s64(int64x2_t __a,int64x2_t __b)25268 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
25269 {
25270 #ifdef __AARCH64EB__
25271   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25272 #else
25273   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25274 #endif
25275 }
25276 
25277 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp2q_u8(uint8x16_t __a,uint8x16_t __b)25278 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
25279 {
25280 #ifdef __AARCH64EB__
25281   return __builtin_shuffle (__a, __b, (uint8x16_t)
25282       {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25283 #else
25284   return __builtin_shuffle (__a, __b, (uint8x16_t)
25285       {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25286 #endif
25287 }
25288 
25289 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp2q_u16(uint16x8_t __a,uint16x8_t __b)25290 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
25291 {
25292 #ifdef __AARCH64EB__
25293   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25294 #else
25295   return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25296 #endif
25297 }
25298 
25299 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp2q_u32(uint32x4_t __a,uint32x4_t __b)25300 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
25301 {
25302 #ifdef __AARCH64EB__
25303   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25304 #else
25305   return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25306 #endif
25307 }
25308 
25309 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp2q_u64(uint64x2_t __a,uint64x2_t __b)25310 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
25311 {
25312 #ifdef __AARCH64EB__
25313   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25314 #else
25315   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25316 #endif
25317 }
25318 
__INTERLEAVE_LIST(uzp)25319 __INTERLEAVE_LIST (uzp)
25320 
25321 /* vzip */
25322 
25323 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25324 vzip1_f32 (float32x2_t __a, float32x2_t __b)
25325 {
25326 #ifdef __AARCH64EB__
25327   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25328 #else
25329   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25330 #endif
25331 }
25332 
25333 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vzip1_p8(poly8x8_t __a,poly8x8_t __b)25334 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
25335 {
25336 #ifdef __AARCH64EB__
25337   return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25338 #else
25339   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25340 #endif
25341 }
25342 
25343 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vzip1_p16(poly16x4_t __a,poly16x4_t __b)25344 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
25345 {
25346 #ifdef __AARCH64EB__
25347   return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25348 #else
25349   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25350 #endif
25351 }
25352 
25353 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vzip1_s8(int8x8_t __a,int8x8_t __b)25354 vzip1_s8 (int8x8_t __a, int8x8_t __b)
25355 {
25356 #ifdef __AARCH64EB__
25357   return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25358 #else
25359   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25360 #endif
25361 }
25362 
25363 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vzip1_s16(int16x4_t __a,int16x4_t __b)25364 vzip1_s16 (int16x4_t __a, int16x4_t __b)
25365 {
25366 #ifdef __AARCH64EB__
25367   return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25368 #else
25369   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25370 #endif
25371 }
25372 
25373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vzip1_s32(int32x2_t __a,int32x2_t __b)25374 vzip1_s32 (int32x2_t __a, int32x2_t __b)
25375 {
25376 #ifdef __AARCH64EB__
25377   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25378 #else
25379   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25380 #endif
25381 }
25382 
25383 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vzip1_u8(uint8x8_t __a,uint8x8_t __b)25384 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
25385 {
25386 #ifdef __AARCH64EB__
25387   return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25388 #else
25389   return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25390 #endif
25391 }
25392 
25393 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vzip1_u16(uint16x4_t __a,uint16x4_t __b)25394 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
25395 {
25396 #ifdef __AARCH64EB__
25397   return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25398 #else
25399   return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25400 #endif
25401 }
25402 
25403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vzip1_u32(uint32x2_t __a,uint32x2_t __b)25404 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
25405 {
25406 #ifdef __AARCH64EB__
25407   return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25408 #else
25409   return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25410 #endif
25411 }
25412 
25413 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vzip1q_f32(float32x4_t __a,float32x4_t __b)25414 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
25415 {
25416 #ifdef __AARCH64EB__
25417   return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25418 #else
25419   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25420 #endif
25421 }
25422 
25423 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vzip1q_f64(float64x2_t __a,float64x2_t __b)25424 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
25425 {
25426 #ifdef __AARCH64EB__
25427   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25428 #else
25429   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25430 #endif
25431 }
25432 
25433 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vzip1q_p8(poly8x16_t __a,poly8x16_t __b)25434 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
25435 {
25436 #ifdef __AARCH64EB__
25437   return __builtin_shuffle (__a, __b, (uint8x16_t)
25438       {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25439 #else
25440   return __builtin_shuffle (__a, __b, (uint8x16_t)
25441       {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25442 #endif
25443 }
25444 
25445 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vzip1q_p16(poly16x8_t __a,poly16x8_t __b)25446 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
25447 {
25448 #ifdef __AARCH64EB__
25449   return __builtin_shuffle (__a, __b, (uint16x8_t)
25450       {12, 4, 13, 5, 14, 6, 15, 7});
25451 #else
25452   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25453 #endif
25454 }
25455 
25456 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vzip1q_s8(int8x16_t __a,int8x16_t __b)25457 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
25458 {
25459 #ifdef __AARCH64EB__
25460   return __builtin_shuffle (__a, __b, (uint8x16_t)
25461       {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25462 #else
25463   return __builtin_shuffle (__a, __b, (uint8x16_t)
25464       {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25465 #endif
25466 }
25467 
25468 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vzip1q_s16(int16x8_t __a,int16x8_t __b)25469 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
25470 {
25471 #ifdef __AARCH64EB__
25472   return __builtin_shuffle (__a, __b, (uint16x8_t)
25473       {12, 4, 13, 5, 14, 6, 15, 7});
25474 #else
25475   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25476 #endif
25477 }
25478 
25479 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vzip1q_s32(int32x4_t __a,int32x4_t __b)25480 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
25481 {
25482 #ifdef __AARCH64EB__
25483   return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25484 #else
25485   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25486 #endif
25487 }
25488 
25489 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vzip1q_s64(int64x2_t __a,int64x2_t __b)25490 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
25491 {
25492 #ifdef __AARCH64EB__
25493   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25494 #else
25495   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25496 #endif
25497 }
25498 
25499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vzip1q_u8(uint8x16_t __a,uint8x16_t __b)25500 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
25501 {
25502 #ifdef __AARCH64EB__
25503   return __builtin_shuffle (__a, __b, (uint8x16_t)
25504       {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25505 #else
25506   return __builtin_shuffle (__a, __b, (uint8x16_t)
25507       {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25508 #endif
25509 }
25510 
25511 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vzip1q_u16(uint16x8_t __a,uint16x8_t __b)25512 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
25513 {
25514 #ifdef __AARCH64EB__
25515   return __builtin_shuffle (__a, __b, (uint16x8_t)
25516       {12, 4, 13, 5, 14, 6, 15, 7});
25517 #else
25518   return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25519 #endif
25520 }
25521 
25522 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vzip1q_u32(uint32x4_t __a,uint32x4_t __b)25523 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25524 {
25525 #ifdef __AARCH64EB__
25526   return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25527 #else
25528   return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25529 #endif
25530 }
25531 
25532 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vzip1q_u64(uint64x2_t __a,uint64x2_t __b)25533 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25534 {
25535 #ifdef __AARCH64EB__
25536   return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25537 #else
25538   return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25539 #endif
25540 }
25541 
25542 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vzip2_f32(float32x2_t __a,float32x2_t __b)25543 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25544 {
25545 #ifdef __AARCH64EB__
25546   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25547 #else
25548   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25549 #endif
25550 }
25551 
25552 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vzip2_p8(poly8x8_t __a,poly8x8_t __b)25553 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25554 {
25555 #ifdef __AARCH64EB__
25556   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25557 #else
25558   return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25559 #endif
25560 }
25561 
25562 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vzip2_p16(poly16x4_t __a,poly16x4_t __b)25563 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25564 {
25565 #ifdef __AARCH64EB__
25566   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25567 #else
25568   return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25569 #endif
25570 }
25571 
25572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vzip2_s8(int8x8_t __a,int8x8_t __b)25573 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25574 {
25575 #ifdef __AARCH64EB__
25576   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25577 #else
25578   return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25579 #endif
25580 }
25581 
25582 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vzip2_s16(int16x4_t __a,int16x4_t __b)25583 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25584 {
25585 #ifdef __AARCH64EB__
25586   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25587 #else
25588   return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25589 #endif
25590 }
25591 
25592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vzip2_s32(int32x2_t __a,int32x2_t __b)25593 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25594 {
25595 #ifdef __AARCH64EB__
25596   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25597 #else
25598   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25599 #endif
25600 }
25601 
25602 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vzip2_u8(uint8x8_t __a,uint8x8_t __b)25603 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25604 {
25605 #ifdef __AARCH64EB__
25606   return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25607 #else
25608   return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25609 #endif
25610 }
25611 
25612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vzip2_u16(uint16x4_t __a,uint16x4_t __b)25613 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25614 {
25615 #ifdef __AARCH64EB__
25616   return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25617 #else
25618   return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25619 #endif
25620 }
25621 
25622 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vzip2_u32(uint32x2_t __a,uint32x2_t __b)25623 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25624 {
25625 #ifdef __AARCH64EB__
25626   return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25627 #else
25628   return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25629 #endif
25630 }
25631 
25632 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vzip2q_f32(float32x4_t __a,float32x4_t __b)25633 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25634 {
25635 #ifdef __AARCH64EB__
25636   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25637 #else
25638   return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25639 #endif
25640 }
25641 
25642 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vzip2q_f64(float64x2_t __a,float64x2_t __b)25643 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25644 {
25645 #ifdef __AARCH64EB__
25646   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25647 #else
25648   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25649 #endif
25650 }
25651 
25652 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vzip2q_p8(poly8x16_t __a,poly8x16_t __b)25653 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25654 {
25655 #ifdef __AARCH64EB__
25656   return __builtin_shuffle (__a, __b, (uint8x16_t)
25657       {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25658 #else
25659   return __builtin_shuffle (__a, __b, (uint8x16_t)
25660       {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25661 #endif
25662 }
25663 
25664 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vzip2q_p16(poly16x8_t __a,poly16x8_t __b)25665 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25666 {
25667 #ifdef __AARCH64EB__
25668   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25669 #else
25670   return __builtin_shuffle (__a, __b, (uint16x8_t)
25671       {4, 12, 5, 13, 6, 14, 7, 15});
25672 #endif
25673 }
25674 
25675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vzip2q_s8(int8x16_t __a,int8x16_t __b)25676 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25677 {
25678 #ifdef __AARCH64EB__
25679   return __builtin_shuffle (__a, __b, (uint8x16_t)
25680       {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25681 #else
25682   return __builtin_shuffle (__a, __b, (uint8x16_t)
25683       {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25684 #endif
25685 }
25686 
25687 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vzip2q_s16(int16x8_t __a,int16x8_t __b)25688 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25689 {
25690 #ifdef __AARCH64EB__
25691   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25692 #else
25693   return __builtin_shuffle (__a, __b, (uint16x8_t)
25694       {4, 12, 5, 13, 6, 14, 7, 15});
25695 #endif
25696 }
25697 
25698 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vzip2q_s32(int32x4_t __a,int32x4_t __b)25699 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25700 {
25701 #ifdef __AARCH64EB__
25702   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25703 #else
25704   return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25705 #endif
25706 }
25707 
25708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vzip2q_s64(int64x2_t __a,int64x2_t __b)25709 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25710 {
25711 #ifdef __AARCH64EB__
25712   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25713 #else
25714   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25715 #endif
25716 }
25717 
25718 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vzip2q_u8(uint8x16_t __a,uint8x16_t __b)25719 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25720 {
25721 #ifdef __AARCH64EB__
25722   return __builtin_shuffle (__a, __b, (uint8x16_t)
25723       {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25724 #else
25725   return __builtin_shuffle (__a, __b, (uint8x16_t)
25726       {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25727 #endif
25728 }
25729 
25730 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vzip2q_u16(uint16x8_t __a,uint16x8_t __b)25731 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25732 {
25733 #ifdef __AARCH64EB__
25734   return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25735 #else
25736   return __builtin_shuffle (__a, __b, (uint16x8_t)
25737       {4, 12, 5, 13, 6, 14, 7, 15});
25738 #endif
25739 }
25740 
25741 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vzip2q_u32(uint32x4_t __a,uint32x4_t __b)25742 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25743 {
25744 #ifdef __AARCH64EB__
25745   return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25746 #else
25747   return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25748 #endif
25749 }
25750 
25751 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vzip2q_u64(uint64x2_t __a,uint64x2_t __b)25752 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25753 {
25754 #ifdef __AARCH64EB__
25755   return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25756 #else
25757   return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25758 #endif
25759 }
25760 
25761 __INTERLEAVE_LIST (zip)
25762 
25763 #undef __INTERLEAVE_LIST
25764 #undef __DEFINTERLEAVE
25765 
25766 /* End of optimal implementations in approved order.  */
25767 
25768 #undef __aarch64_vget_lane_any
25769 
25770 #undef __aarch64_vdup_lane_any
25771 #undef __aarch64_vdup_lane_f32
25772 #undef __aarch64_vdup_lane_f64
25773 #undef __aarch64_vdup_lane_p8
25774 #undef __aarch64_vdup_lane_p16
25775 #undef __aarch64_vdup_lane_s8
25776 #undef __aarch64_vdup_lane_s16
25777 #undef __aarch64_vdup_lane_s32
25778 #undef __aarch64_vdup_lane_s64
25779 #undef __aarch64_vdup_lane_u8
25780 #undef __aarch64_vdup_lane_u16
25781 #undef __aarch64_vdup_lane_u32
25782 #undef __aarch64_vdup_lane_u64
25783 #undef __aarch64_vdup_laneq_f32
25784 #undef __aarch64_vdup_laneq_f64
25785 #undef __aarch64_vdup_laneq_p8
25786 #undef __aarch64_vdup_laneq_p16
25787 #undef __aarch64_vdup_laneq_s8
25788 #undef __aarch64_vdup_laneq_s16
25789 #undef __aarch64_vdup_laneq_s32
25790 #undef __aarch64_vdup_laneq_s64
25791 #undef __aarch64_vdup_laneq_u8
25792 #undef __aarch64_vdup_laneq_u16
25793 #undef __aarch64_vdup_laneq_u32
25794 #undef __aarch64_vdup_laneq_u64
25795 #undef __aarch64_vdupq_lane_f32
25796 #undef __aarch64_vdupq_lane_f64
25797 #undef __aarch64_vdupq_lane_p8
25798 #undef __aarch64_vdupq_lane_p16
25799 #undef __aarch64_vdupq_lane_s8
25800 #undef __aarch64_vdupq_lane_s16
25801 #undef __aarch64_vdupq_lane_s32
25802 #undef __aarch64_vdupq_lane_s64
25803 #undef __aarch64_vdupq_lane_u8
25804 #undef __aarch64_vdupq_lane_u16
25805 #undef __aarch64_vdupq_lane_u32
25806 #undef __aarch64_vdupq_lane_u64
25807 #undef __aarch64_vdupq_laneq_f32
25808 #undef __aarch64_vdupq_laneq_f64
25809 #undef __aarch64_vdupq_laneq_p8
25810 #undef __aarch64_vdupq_laneq_p16
25811 #undef __aarch64_vdupq_laneq_s8
25812 #undef __aarch64_vdupq_laneq_s16
25813 #undef __aarch64_vdupq_laneq_s32
25814 #undef __aarch64_vdupq_laneq_s64
25815 #undef __aarch64_vdupq_laneq_u8
25816 #undef __aarch64_vdupq_laneq_u16
25817 #undef __aarch64_vdupq_laneq_u32
25818 #undef __aarch64_vdupq_laneq_u64
25819 
25820 #pragma GCC pop_options
25821 
25822 #endif
25823