1 /* ARM NEON intrinsics include file.
2 
3    Copyright (C) 2011-2013 Free Software Foundation, Inc.
4    Contributed by ARM Ltd.
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
29 
30 #include <stdint.h>
31 
32 typedef __builtin_aarch64_simd_qi int8x8_t
33   __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35   __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37   __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t;
39 typedef int32_t int32x1_t;
40 typedef int16_t int16x1_t;
41 typedef int8_t int8x1_t;
42 typedef double float64x1_t;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44   __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46   __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48   __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50   __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52   __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54   __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t;
56 typedef uint32_t uint32x1_t;
57 typedef uint16_t uint16x1_t;
58 typedef uint8_t uint8x1_t;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60   __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62   __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64   __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66   __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68   __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70   __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72   __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74   __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76   __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78   __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80   __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82   __attribute__ ((__vector_size__ (16)));
83 
84 typedef float float32_t;
85 typedef double float64_t;
86 typedef __builtin_aarch64_simd_poly8 poly8_t;
87 typedef __builtin_aarch64_simd_poly16 poly16_t;
88 
89 typedef struct int8x8x2_t
90 {
91   int8x8_t val[2];
92 } int8x8x2_t;
93 
94 typedef struct int8x16x2_t
95 {
96   int8x16_t val[2];
97 } int8x16x2_t;
98 
99 typedef struct int16x4x2_t
100 {
101   int16x4_t val[2];
102 } int16x4x2_t;
103 
104 typedef struct int16x8x2_t
105 {
106   int16x8_t val[2];
107 } int16x8x2_t;
108 
109 typedef struct int32x2x2_t
110 {
111   int32x2_t val[2];
112 } int32x2x2_t;
113 
114 typedef struct int32x4x2_t
115 {
116   int32x4_t val[2];
117 } int32x4x2_t;
118 
119 typedef struct int64x1x2_t
120 {
121   int64x1_t val[2];
122 } int64x1x2_t;
123 
124 typedef struct int64x2x2_t
125 {
126   int64x2_t val[2];
127 } int64x2x2_t;
128 
129 typedef struct uint8x8x2_t
130 {
131   uint8x8_t val[2];
132 } uint8x8x2_t;
133 
134 typedef struct uint8x16x2_t
135 {
136   uint8x16_t val[2];
137 } uint8x16x2_t;
138 
139 typedef struct uint16x4x2_t
140 {
141   uint16x4_t val[2];
142 } uint16x4x2_t;
143 
144 typedef struct uint16x8x2_t
145 {
146   uint16x8_t val[2];
147 } uint16x8x2_t;
148 
149 typedef struct uint32x2x2_t
150 {
151   uint32x2_t val[2];
152 } uint32x2x2_t;
153 
154 typedef struct uint32x4x2_t
155 {
156   uint32x4_t val[2];
157 } uint32x4x2_t;
158 
159 typedef struct uint64x1x2_t
160 {
161   uint64x1_t val[2];
162 } uint64x1x2_t;
163 
164 typedef struct uint64x2x2_t
165 {
166   uint64x2_t val[2];
167 } uint64x2x2_t;
168 
169 typedef struct float32x2x2_t
170 {
171   float32x2_t val[2];
172 } float32x2x2_t;
173 
174 typedef struct float32x4x2_t
175 {
176   float32x4_t val[2];
177 } float32x4x2_t;
178 
179 typedef struct float64x2x2_t
180 {
181   float64x2_t val[2];
182 } float64x2x2_t;
183 
184 typedef struct float64x1x2_t
185 {
186   float64x1_t val[2];
187 } float64x1x2_t;
188 
189 typedef struct poly8x8x2_t
190 {
191   poly8x8_t val[2];
192 } poly8x8x2_t;
193 
194 typedef struct poly8x16x2_t
195 {
196   poly8x16_t val[2];
197 } poly8x16x2_t;
198 
199 typedef struct poly16x4x2_t
200 {
201   poly16x4_t val[2];
202 } poly16x4x2_t;
203 
204 typedef struct poly16x8x2_t
205 {
206   poly16x8_t val[2];
207 } poly16x8x2_t;
208 
209 typedef struct int8x8x3_t
210 {
211   int8x8_t val[3];
212 } int8x8x3_t;
213 
214 typedef struct int8x16x3_t
215 {
216   int8x16_t val[3];
217 } int8x16x3_t;
218 
219 typedef struct int16x4x3_t
220 {
221   int16x4_t val[3];
222 } int16x4x3_t;
223 
224 typedef struct int16x8x3_t
225 {
226   int16x8_t val[3];
227 } int16x8x3_t;
228 
229 typedef struct int32x2x3_t
230 {
231   int32x2_t val[3];
232 } int32x2x3_t;
233 
234 typedef struct int32x4x3_t
235 {
236   int32x4_t val[3];
237 } int32x4x3_t;
238 
239 typedef struct int64x1x3_t
240 {
241   int64x1_t val[3];
242 } int64x1x3_t;
243 
244 typedef struct int64x2x3_t
245 {
246   int64x2_t val[3];
247 } int64x2x3_t;
248 
249 typedef struct uint8x8x3_t
250 {
251   uint8x8_t val[3];
252 } uint8x8x3_t;
253 
254 typedef struct uint8x16x3_t
255 {
256   uint8x16_t val[3];
257 } uint8x16x3_t;
258 
259 typedef struct uint16x4x3_t
260 {
261   uint16x4_t val[3];
262 } uint16x4x3_t;
263 
264 typedef struct uint16x8x3_t
265 {
266   uint16x8_t val[3];
267 } uint16x8x3_t;
268 
269 typedef struct uint32x2x3_t
270 {
271   uint32x2_t val[3];
272 } uint32x2x3_t;
273 
274 typedef struct uint32x4x3_t
275 {
276   uint32x4_t val[3];
277 } uint32x4x3_t;
278 
279 typedef struct uint64x1x3_t
280 {
281   uint64x1_t val[3];
282 } uint64x1x3_t;
283 
284 typedef struct uint64x2x3_t
285 {
286   uint64x2_t val[3];
287 } uint64x2x3_t;
288 
289 typedef struct float32x2x3_t
290 {
291   float32x2_t val[3];
292 } float32x2x3_t;
293 
294 typedef struct float32x4x3_t
295 {
296   float32x4_t val[3];
297 } float32x4x3_t;
298 
299 typedef struct float64x2x3_t
300 {
301   float64x2_t val[3];
302 } float64x2x3_t;
303 
304 typedef struct float64x1x3_t
305 {
306   float64x1_t val[3];
307 } float64x1x3_t;
308 
309 typedef struct poly8x8x3_t
310 {
311   poly8x8_t val[3];
312 } poly8x8x3_t;
313 
314 typedef struct poly8x16x3_t
315 {
316   poly8x16_t val[3];
317 } poly8x16x3_t;
318 
319 typedef struct poly16x4x3_t
320 {
321   poly16x4_t val[3];
322 } poly16x4x3_t;
323 
324 typedef struct poly16x8x3_t
325 {
326   poly16x8_t val[3];
327 } poly16x8x3_t;
328 
329 typedef struct int8x8x4_t
330 {
331   int8x8_t val[4];
332 } int8x8x4_t;
333 
334 typedef struct int8x16x4_t
335 {
336   int8x16_t val[4];
337 } int8x16x4_t;
338 
339 typedef struct int16x4x4_t
340 {
341   int16x4_t val[4];
342 } int16x4x4_t;
343 
344 typedef struct int16x8x4_t
345 {
346   int16x8_t val[4];
347 } int16x8x4_t;
348 
349 typedef struct int32x2x4_t
350 {
351   int32x2_t val[4];
352 } int32x2x4_t;
353 
354 typedef struct int32x4x4_t
355 {
356   int32x4_t val[4];
357 } int32x4x4_t;
358 
359 typedef struct int64x1x4_t
360 {
361   int64x1_t val[4];
362 } int64x1x4_t;
363 
364 typedef struct int64x2x4_t
365 {
366   int64x2_t val[4];
367 } int64x2x4_t;
368 
369 typedef struct uint8x8x4_t
370 {
371   uint8x8_t val[4];
372 } uint8x8x4_t;
373 
374 typedef struct uint8x16x4_t
375 {
376   uint8x16_t val[4];
377 } uint8x16x4_t;
378 
379 typedef struct uint16x4x4_t
380 {
381   uint16x4_t val[4];
382 } uint16x4x4_t;
383 
384 typedef struct uint16x8x4_t
385 {
386   uint16x8_t val[4];
387 } uint16x8x4_t;
388 
389 typedef struct uint32x2x4_t
390 {
391   uint32x2_t val[4];
392 } uint32x2x4_t;
393 
394 typedef struct uint32x4x4_t
395 {
396   uint32x4_t val[4];
397 } uint32x4x4_t;
398 
399 typedef struct uint64x1x4_t
400 {
401   uint64x1_t val[4];
402 } uint64x1x4_t;
403 
404 typedef struct uint64x2x4_t
405 {
406   uint64x2_t val[4];
407 } uint64x2x4_t;
408 
409 typedef struct float32x2x4_t
410 {
411   float32x2_t val[4];
412 } float32x2x4_t;
413 
414 typedef struct float32x4x4_t
415 {
416   float32x4_t val[4];
417 } float32x4x4_t;
418 
419 typedef struct float64x2x4_t
420 {
421   float64x2_t val[4];
422 } float64x2x4_t;
423 
424 typedef struct float64x1x4_t
425 {
426   float64x1_t val[4];
427 } float64x1x4_t;
428 
429 typedef struct poly8x8x4_t
430 {
431   poly8x8_t val[4];
432 } poly8x8x4_t;
433 
434 typedef struct poly8x16x4_t
435 {
436   poly8x16_t val[4];
437 } poly8x16x4_t;
438 
439 typedef struct poly16x4x4_t
440 {
441   poly16x4_t val[4];
442 } poly16x4x4_t;
443 
444 typedef struct poly16x8x4_t
445 {
446   poly16x8_t val[4];
447 } poly16x8x4_t;
448 
449 
450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vadd_s8(int8x8_t __a,int8x8_t __b)451 vadd_s8 (int8x8_t __a, int8x8_t __b)
452 {
453   return __a + __b;
454 }
455 
456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vadd_s16(int16x4_t __a,int16x4_t __b)457 vadd_s16 (int16x4_t __a, int16x4_t __b)
458 {
459   return __a + __b;
460 }
461 
462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vadd_s32(int32x2_t __a,int32x2_t __b)463 vadd_s32 (int32x2_t __a, int32x2_t __b)
464 {
465   return __a + __b;
466 }
467 
468 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vadd_f32(float32x2_t __a,float32x2_t __b)469 vadd_f32 (float32x2_t __a, float32x2_t __b)
470 {
471   return __a + __b;
472 }
473 
474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vadd_u8(uint8x8_t __a,uint8x8_t __b)475 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
476 {
477   return __a + __b;
478 }
479 
480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vadd_u16(uint16x4_t __a,uint16x4_t __b)481 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
482 {
483   return __a + __b;
484 }
485 
486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vadd_u32(uint32x2_t __a,uint32x2_t __b)487 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
488 {
489   return __a + __b;
490 }
491 
492 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vadd_s64(int64x1_t __a,int64x1_t __b)493 vadd_s64 (int64x1_t __a, int64x1_t __b)
494 {
495   return __a + __b;
496 }
497 
498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vadd_u64(uint64x1_t __a,uint64x1_t __b)499 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
500 {
501   return __a + __b;
502 }
503 
504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vaddq_s8(int8x16_t __a,int8x16_t __b)505 vaddq_s8 (int8x16_t __a, int8x16_t __b)
506 {
507   return __a + __b;
508 }
509 
510 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddq_s16(int16x8_t __a,int16x8_t __b)511 vaddq_s16 (int16x8_t __a, int16x8_t __b)
512 {
513   return __a + __b;
514 }
515 
516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddq_s32(int32x4_t __a,int32x4_t __b)517 vaddq_s32 (int32x4_t __a, int32x4_t __b)
518 {
519   return __a + __b;
520 }
521 
522 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddq_s64(int64x2_t __a,int64x2_t __b)523 vaddq_s64 (int64x2_t __a, int64x2_t __b)
524 {
525   return __a + __b;
526 }
527 
528 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vaddq_f32(float32x4_t __a,float32x4_t __b)529 vaddq_f32 (float32x4_t __a, float32x4_t __b)
530 {
531   return __a + __b;
532 }
533 
534 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vaddq_f64(float64x2_t __a,float64x2_t __b)535 vaddq_f64 (float64x2_t __a, float64x2_t __b)
536 {
537   return __a + __b;
538 }
539 
540 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaddq_u8(uint8x16_t __a,uint8x16_t __b)541 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
542 {
543   return __a + __b;
544 }
545 
546 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddq_u16(uint16x8_t __a,uint16x8_t __b)547 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
548 {
549   return __a + __b;
550 }
551 
552 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddq_u32(uint32x4_t __a,uint32x4_t __b)553 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
554 {
555   return __a + __b;
556 }
557 
558 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddq_u64(uint64x2_t __a,uint64x2_t __b)559 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
560 {
561   return __a + __b;
562 }
563 
564 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_s8(int8x8_t __a,int8x8_t __b)565 vaddl_s8 (int8x8_t __a, int8x8_t __b)
566 {
567   return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
568 }
569 
570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_s16(int16x4_t __a,int16x4_t __b)571 vaddl_s16 (int16x4_t __a, int16x4_t __b)
572 {
573   return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
574 }
575 
576 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_s32(int32x2_t __a,int32x2_t __b)577 vaddl_s32 (int32x2_t __a, int32x2_t __b)
578 {
579   return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
580 }
581 
582 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_u8(uint8x8_t __a,uint8x8_t __b)583 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
584 {
585   return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
586 						   (int8x8_t) __b);
587 }
588 
589 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_u16(uint16x4_t __a,uint16x4_t __b)590 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
591 {
592   return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
593 						   (int16x4_t) __b);
594 }
595 
596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_u32(uint32x2_t __a,uint32x2_t __b)597 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
598 {
599   return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
600 						   (int32x2_t) __b);
601 }
602 
603 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_high_s8(int8x16_t __a,int8x16_t __b)604 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
605 {
606   return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
607 }
608 
609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_high_s16(int16x8_t __a,int16x8_t __b)610 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
611 {
612   return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
613 }
614 
615 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_high_s32(int32x4_t __a,int32x4_t __b)616 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
617 {
618   return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
619 }
620 
621 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_high_u8(uint8x16_t __a,uint8x16_t __b)622 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
623 {
624   return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
625 						     (int8x16_t) __b);
626 }
627 
628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_high_u16(uint16x8_t __a,uint16x8_t __b)629 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
630 {
631   return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
632 						    (int16x8_t) __b);
633 }
634 
635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_high_u32(uint32x4_t __a,uint32x4_t __b)636 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
637 {
638   return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
639 						    (int32x4_t) __b);
640 }
641 
642 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddw_s8(int16x8_t __a,int8x8_t __b)643 vaddw_s8 (int16x8_t __a, int8x8_t __b)
644 {
645   return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
646 }
647 
648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddw_s16(int32x4_t __a,int16x4_t __b)649 vaddw_s16 (int32x4_t __a, int16x4_t __b)
650 {
651   return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
652 }
653 
654 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddw_s32(int64x2_t __a,int32x2_t __b)655 vaddw_s32 (int64x2_t __a, int32x2_t __b)
656 {
657   return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
658 }
659 
660 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddw_u8(uint16x8_t __a,uint8x8_t __b)661 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
662 {
663   return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
664 						   (int8x8_t) __b);
665 }
666 
667 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddw_u16(uint32x4_t __a,uint16x4_t __b)668 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
669 {
670   return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
671 						   (int16x4_t) __b);
672 }
673 
674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddw_u32(uint64x2_t __a,uint32x2_t __b)675 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
676 {
677   return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
678 						   (int32x2_t) __b);
679 }
680 
681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddw_high_s8(int16x8_t __a,int8x16_t __b)682 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
683 {
684   return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
685 }
686 
687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddw_high_s16(int32x4_t __a,int16x8_t __b)688 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
689 {
690   return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
691 }
692 
693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddw_high_s32(int64x2_t __a,int32x4_t __b)694 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
695 {
696   return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
697 }
698 
699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddw_high_u8(uint16x8_t __a,uint8x16_t __b)700 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
701 {
702   return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
703 						     (int8x16_t) __b);
704 }
705 
706 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddw_high_u16(uint32x4_t __a,uint16x8_t __b)707 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
708 {
709   return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
710 						    (int16x8_t) __b);
711 }
712 
713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddw_high_u32(uint64x2_t __a,uint32x4_t __b)714 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
715 {
716   return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
717 						    (int32x4_t) __b);
718 }
719 
720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vhadd_s8(int8x8_t __a,int8x8_t __b)721 vhadd_s8 (int8x8_t __a, int8x8_t __b)
722 {
723   return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
724 }
725 
726 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vhadd_s16(int16x4_t __a,int16x4_t __b)727 vhadd_s16 (int16x4_t __a, int16x4_t __b)
728 {
729   return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
730 }
731 
732 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vhadd_s32(int32x2_t __a,int32x2_t __b)733 vhadd_s32 (int32x2_t __a, int32x2_t __b)
734 {
735   return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
736 }
737 
738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vhadd_u8(uint8x8_t __a,uint8x8_t __b)739 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
740 {
741   return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
742 						  (int8x8_t) __b);
743 }
744 
745 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vhadd_u16(uint16x4_t __a,uint16x4_t __b)746 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
747 {
748   return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
749 						   (int16x4_t) __b);
750 }
751 
752 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vhadd_u32(uint32x2_t __a,uint32x2_t __b)753 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
754 {
755   return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
756 						   (int32x2_t) __b);
757 }
758 
759 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vhaddq_s8(int8x16_t __a,int8x16_t __b)760 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
761 {
762   return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
763 }
764 
765 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vhaddq_s16(int16x8_t __a,int16x8_t __b)766 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
767 {
768   return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
769 }
770 
771 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vhaddq_s32(int32x4_t __a,int32x4_t __b)772 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
773 {
774   return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
775 }
776 
777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vhaddq_u8(uint8x16_t __a,uint8x16_t __b)778 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
779 {
780   return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
781 						    (int8x16_t) __b);
782 }
783 
784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vhaddq_u16(uint16x8_t __a,uint16x8_t __b)785 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
786 {
787   return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
788 						   (int16x8_t) __b);
789 }
790 
791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vhaddq_u32(uint32x4_t __a,uint32x4_t __b)792 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
793 {
794   return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
795 						   (int32x4_t) __b);
796 }
797 
798 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrhadd_s8(int8x8_t __a,int8x8_t __b)799 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
800 {
801   return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
802 }
803 
804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrhadd_s16(int16x4_t __a,int16x4_t __b)805 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
806 {
807   return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
808 }
809 
810 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrhadd_s32(int32x2_t __a,int32x2_t __b)811 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
812 {
813   return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
814 }
815 
816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrhadd_u8(uint8x8_t __a,uint8x8_t __b)817 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
818 {
819   return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
820 						   (int8x8_t) __b);
821 }
822 
823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrhadd_u16(uint16x4_t __a,uint16x4_t __b)824 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
825 {
826   return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
827 						    (int16x4_t) __b);
828 }
829 
830 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrhadd_u32(uint32x2_t __a,uint32x2_t __b)831 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
832 {
833   return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
834 						    (int32x2_t) __b);
835 }
836 
837 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrhaddq_s8(int8x16_t __a,int8x16_t __b)838 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
839 {
840   return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
841 }
842 
843 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrhaddq_s16(int16x8_t __a,int16x8_t __b)844 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
845 {
846   return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
847 }
848 
849 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrhaddq_s32(int32x4_t __a,int32x4_t __b)850 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
851 {
852   return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
853 }
854 
855 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrhaddq_u8(uint8x16_t __a,uint8x16_t __b)856 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
857 {
858   return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
859 						     (int8x16_t) __b);
860 }
861 
862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrhaddq_u16(uint16x8_t __a,uint16x8_t __b)863 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
864 {
865   return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
866 						    (int16x8_t) __b);
867 }
868 
869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrhaddq_u32(uint32x4_t __a,uint32x4_t __b)870 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
871 {
872   return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
873 						    (int32x4_t) __b);
874 }
875 
876 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vaddhn_s16(int16x8_t __a,int16x8_t __b)877 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
878 {
879   return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
880 }
881 
882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vaddhn_s32(int32x4_t __a,int32x4_t __b)883 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
884 {
885   return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
886 }
887 
888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vaddhn_s64(int64x2_t __a,int64x2_t __b)889 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
890 {
891   return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
892 }
893 
894 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vaddhn_u16(uint16x8_t __a,uint16x8_t __b)895 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
896 {
897   return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
898 						  (int16x8_t) __b);
899 }
900 
901 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vaddhn_u32(uint32x4_t __a,uint32x4_t __b)902 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
903 {
904   return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
905 						   (int32x4_t) __b);
906 }
907 
908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vaddhn_u64(uint64x2_t __a,uint64x2_t __b)909 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
910 {
911   return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
912 						   (int64x2_t) __b);
913 }
914 
915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vraddhn_s16(int16x8_t __a,int16x8_t __b)916 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
917 {
918   return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
919 }
920 
921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vraddhn_s32(int32x4_t __a,int32x4_t __b)922 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
923 {
924   return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
925 }
926 
927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vraddhn_s64(int64x2_t __a,int64x2_t __b)928 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
929 {
930   return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
931 }
932 
933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vraddhn_u16(uint16x8_t __a,uint16x8_t __b)934 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
935 {
936   return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
937 						   (int16x8_t) __b);
938 }
939 
940 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vraddhn_u32(uint32x4_t __a,uint32x4_t __b)941 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
942 {
943   return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
944 						    (int32x4_t) __b);
945 }
946 
947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vraddhn_u64(uint64x2_t __a,uint64x2_t __b)948 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
949 {
950   return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
951 						    (int64x2_t) __b);
952 }
953 
954 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vaddhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)955 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
956 {
957   return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
958 }
959 
960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)961 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
962 {
963   return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
964 }
965 
966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)967 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
968 {
969   return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
970 }
971 
972 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaddhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)973 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
974 {
975   return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
976 						    (int16x8_t) __b,
977 						    (int16x8_t) __c);
978 }
979 
980 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)981 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
982 {
983   return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
984 						    (int32x4_t) __b,
985 						    (int32x4_t) __c);
986 }
987 
988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)989 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
990 {
991   return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
992 						    (int64x2_t) __b,
993 						    (int64x2_t) __c);
994 }
995 
996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vraddhn_high_s16(int8x8_t __a,int16x8_t __b,int16x8_t __c)997 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
998 {
999   return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1000 }
1001 
1002 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vraddhn_high_s32(int16x4_t __a,int32x4_t __b,int32x4_t __c)1003 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1004 {
1005   return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1006 }
1007 
1008 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vraddhn_high_s64(int32x2_t __a,int64x2_t __b,int64x2_t __c)1009 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1010 {
1011   return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1012 }
1013 
1014 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vraddhn_high_u16(uint8x8_t __a,uint16x8_t __b,uint16x8_t __c)1015 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1016 {
1017   return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1018 						     (int16x8_t) __b,
1019 						     (int16x8_t) __c);
1020 }
1021 
1022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vraddhn_high_u32(uint16x4_t __a,uint32x4_t __b,uint32x4_t __c)1023 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1024 {
1025   return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1026 						     (int32x4_t) __b,
1027 						     (int32x4_t) __c);
1028 }
1029 
1030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vraddhn_high_u64(uint32x2_t __a,uint64x2_t __b,uint64x2_t __c)1031 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1032 {
1033   return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1034 						     (int64x2_t) __b,
1035 						     (int64x2_t) __c);
1036 }
1037 
1038 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdiv_f32(float32x2_t __a,float32x2_t __b)1039 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1040 {
1041   return __a / __b;
1042 }
1043 
1044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdivq_f32(float32x4_t __a,float32x4_t __b)1045 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1046 {
1047   return __a / __b;
1048 }
1049 
1050 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdivq_f64(float64x2_t __a,float64x2_t __b)1051 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1052 {
1053   return __a / __b;
1054 }
1055 
1056 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmul_s8(int8x8_t __a,int8x8_t __b)1057 vmul_s8 (int8x8_t __a, int8x8_t __b)
1058 {
1059   return __a * __b;
1060 }
1061 
1062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_s16(int16x4_t __a,int16x4_t __b)1063 vmul_s16 (int16x4_t __a, int16x4_t __b)
1064 {
1065   return __a * __b;
1066 }
1067 
1068 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_s32(int32x2_t __a,int32x2_t __b)1069 vmul_s32 (int32x2_t __a, int32x2_t __b)
1070 {
1071   return __a * __b;
1072 }
1073 
1074 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_f32(float32x2_t __a,float32x2_t __b)1075 vmul_f32 (float32x2_t __a, float32x2_t __b)
1076 {
1077   return __a * __b;
1078 }
1079 
1080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmul_u8(uint8x8_t __a,uint8x8_t __b)1081 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1082 {
1083   return __a * __b;
1084 }
1085 
1086 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_u16(uint16x4_t __a,uint16x4_t __b)1087 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1088 {
1089   return __a * __b;
1090 }
1091 
1092 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_u32(uint32x2_t __a,uint32x2_t __b)1093 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1094 {
1095   return __a * __b;
1096 }
1097 
1098 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmul_p8(poly8x8_t __a,poly8x8_t __b)1099 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1100 {
1101   return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1102 						 (int8x8_t) __b);
1103 }
1104 
1105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmulq_s8(int8x16_t __a,int8x16_t __b)1106 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1107 {
1108   return __a * __b;
1109 }
1110 
1111 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_s16(int16x8_t __a,int16x8_t __b)1112 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1113 {
1114   return __a * __b;
1115 }
1116 
1117 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_s32(int32x4_t __a,int32x4_t __b)1118 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1119 {
1120   return __a * __b;
1121 }
1122 
1123 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_f32(float32x4_t __a,float32x4_t __b)1124 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1125 {
1126   return __a * __b;
1127 }
1128 
1129 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_f64(float64x2_t __a,float64x2_t __b)1130 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1131 {
1132   return __a * __b;
1133 }
1134 
1135 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmulq_u8(uint8x16_t __a,uint8x16_t __b)1136 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1137 {
1138   return __a * __b;
1139 }
1140 
1141 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_u16(uint16x8_t __a,uint16x8_t __b)1142 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1143 {
1144   return __a * __b;
1145 }
1146 
1147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_u32(uint32x4_t __a,uint32x4_t __b)1148 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1149 {
1150   return __a * __b;
1151 }
1152 
1153 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmulq_p8(poly8x16_t __a,poly8x16_t __b)1154 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1155 {
1156   return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1157 						   (int8x16_t) __b);
1158 }
1159 
1160 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vand_s8(int8x8_t __a,int8x8_t __b)1161 vand_s8 (int8x8_t __a, int8x8_t __b)
1162 {
1163   return __a & __b;
1164 }
1165 
1166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vand_s16(int16x4_t __a,int16x4_t __b)1167 vand_s16 (int16x4_t __a, int16x4_t __b)
1168 {
1169   return __a & __b;
1170 }
1171 
1172 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vand_s32(int32x2_t __a,int32x2_t __b)1173 vand_s32 (int32x2_t __a, int32x2_t __b)
1174 {
1175   return __a & __b;
1176 }
1177 
1178 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vand_u8(uint8x8_t __a,uint8x8_t __b)1179 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1180 {
1181   return __a & __b;
1182 }
1183 
1184 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vand_u16(uint16x4_t __a,uint16x4_t __b)1185 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1186 {
1187   return __a & __b;
1188 }
1189 
1190 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vand_u32(uint32x2_t __a,uint32x2_t __b)1191 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1192 {
1193   return __a & __b;
1194 }
1195 
1196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vand_s64(int64x1_t __a,int64x1_t __b)1197 vand_s64 (int64x1_t __a, int64x1_t __b)
1198 {
1199   return __a & __b;
1200 }
1201 
1202 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vand_u64(uint64x1_t __a,uint64x1_t __b)1203 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1204 {
1205   return __a & __b;
1206 }
1207 
1208 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vandq_s8(int8x16_t __a,int8x16_t __b)1209 vandq_s8 (int8x16_t __a, int8x16_t __b)
1210 {
1211   return __a & __b;
1212 }
1213 
1214 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vandq_s16(int16x8_t __a,int16x8_t __b)1215 vandq_s16 (int16x8_t __a, int16x8_t __b)
1216 {
1217   return __a & __b;
1218 }
1219 
1220 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vandq_s32(int32x4_t __a,int32x4_t __b)1221 vandq_s32 (int32x4_t __a, int32x4_t __b)
1222 {
1223   return __a & __b;
1224 }
1225 
1226 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vandq_s64(int64x2_t __a,int64x2_t __b)1227 vandq_s64 (int64x2_t __a, int64x2_t __b)
1228 {
1229   return __a & __b;
1230 }
1231 
1232 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vandq_u8(uint8x16_t __a,uint8x16_t __b)1233 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1234 {
1235   return __a & __b;
1236 }
1237 
1238 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vandq_u16(uint16x8_t __a,uint16x8_t __b)1239 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1240 {
1241   return __a & __b;
1242 }
1243 
1244 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vandq_u32(uint32x4_t __a,uint32x4_t __b)1245 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1246 {
1247   return __a & __b;
1248 }
1249 
1250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vandq_u64(uint64x2_t __a,uint64x2_t __b)1251 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1252 {
1253   return __a & __b;
1254 }
1255 
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vorr_s8(int8x8_t __a,int8x8_t __b)1257 vorr_s8 (int8x8_t __a, int8x8_t __b)
1258 {
1259   return __a | __b;
1260 }
1261 
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vorr_s16(int16x4_t __a,int16x4_t __b)1263 vorr_s16 (int16x4_t __a, int16x4_t __b)
1264 {
1265   return __a | __b;
1266 }
1267 
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vorr_s32(int32x2_t __a,int32x2_t __b)1269 vorr_s32 (int32x2_t __a, int32x2_t __b)
1270 {
1271   return __a | __b;
1272 }
1273 
1274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vorr_u8(uint8x8_t __a,uint8x8_t __b)1275 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1276 {
1277   return __a | __b;
1278 }
1279 
1280 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vorr_u16(uint16x4_t __a,uint16x4_t __b)1281 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1282 {
1283   return __a | __b;
1284 }
1285 
1286 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vorr_u32(uint32x2_t __a,uint32x2_t __b)1287 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1288 {
1289   return __a | __b;
1290 }
1291 
1292 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vorr_s64(int64x1_t __a,int64x1_t __b)1293 vorr_s64 (int64x1_t __a, int64x1_t __b)
1294 {
1295   return __a | __b;
1296 }
1297 
1298 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vorr_u64(uint64x1_t __a,uint64x1_t __b)1299 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1300 {
1301   return __a | __b;
1302 }
1303 
1304 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vorrq_s8(int8x16_t __a,int8x16_t __b)1305 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1306 {
1307   return __a | __b;
1308 }
1309 
1310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vorrq_s16(int16x8_t __a,int16x8_t __b)1311 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1312 {
1313   return __a | __b;
1314 }
1315 
1316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vorrq_s32(int32x4_t __a,int32x4_t __b)1317 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1318 {
1319   return __a | __b;
1320 }
1321 
1322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vorrq_s64(int64x2_t __a,int64x2_t __b)1323 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1324 {
1325   return __a | __b;
1326 }
1327 
1328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vorrq_u8(uint8x16_t __a,uint8x16_t __b)1329 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1330 {
1331   return __a | __b;
1332 }
1333 
1334 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vorrq_u16(uint16x8_t __a,uint16x8_t __b)1335 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1336 {
1337   return __a | __b;
1338 }
1339 
1340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vorrq_u32(uint32x4_t __a,uint32x4_t __b)1341 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1342 {
1343   return __a | __b;
1344 }
1345 
1346 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vorrq_u64(uint64x2_t __a,uint64x2_t __b)1347 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1348 {
1349   return __a | __b;
1350 }
1351 
1352 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
veor_s8(int8x8_t __a,int8x8_t __b)1353 veor_s8 (int8x8_t __a, int8x8_t __b)
1354 {
1355   return __a ^ __b;
1356 }
1357 
1358 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
veor_s16(int16x4_t __a,int16x4_t __b)1359 veor_s16 (int16x4_t __a, int16x4_t __b)
1360 {
1361   return __a ^ __b;
1362 }
1363 
1364 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
veor_s32(int32x2_t __a,int32x2_t __b)1365 veor_s32 (int32x2_t __a, int32x2_t __b)
1366 {
1367   return __a ^ __b;
1368 }
1369 
1370 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
veor_u8(uint8x8_t __a,uint8x8_t __b)1371 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1372 {
1373   return __a ^ __b;
1374 }
1375 
1376 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
veor_u16(uint16x4_t __a,uint16x4_t __b)1377 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1378 {
1379   return __a ^ __b;
1380 }
1381 
1382 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
veor_u32(uint32x2_t __a,uint32x2_t __b)1383 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1384 {
1385   return __a ^ __b;
1386 }
1387 
1388 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
veor_s64(int64x1_t __a,int64x1_t __b)1389 veor_s64 (int64x1_t __a, int64x1_t __b)
1390 {
1391   return __a ^ __b;
1392 }
1393 
1394 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
veor_u64(uint64x1_t __a,uint64x1_t __b)1395 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1396 {
1397   return __a ^ __b;
1398 }
1399 
1400 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
veorq_s8(int8x16_t __a,int8x16_t __b)1401 veorq_s8 (int8x16_t __a, int8x16_t __b)
1402 {
1403   return __a ^ __b;
1404 }
1405 
1406 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
veorq_s16(int16x8_t __a,int16x8_t __b)1407 veorq_s16 (int16x8_t __a, int16x8_t __b)
1408 {
1409   return __a ^ __b;
1410 }
1411 
1412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
veorq_s32(int32x4_t __a,int32x4_t __b)1413 veorq_s32 (int32x4_t __a, int32x4_t __b)
1414 {
1415   return __a ^ __b;
1416 }
1417 
1418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
veorq_s64(int64x2_t __a,int64x2_t __b)1419 veorq_s64 (int64x2_t __a, int64x2_t __b)
1420 {
1421   return __a ^ __b;
1422 }
1423 
1424 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
veorq_u8(uint8x16_t __a,uint8x16_t __b)1425 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1426 {
1427   return __a ^ __b;
1428 }
1429 
1430 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
veorq_u16(uint16x8_t __a,uint16x8_t __b)1431 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1432 {
1433   return __a ^ __b;
1434 }
1435 
1436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
veorq_u32(uint32x4_t __a,uint32x4_t __b)1437 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1438 {
1439   return __a ^ __b;
1440 }
1441 
1442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
veorq_u64(uint64x2_t __a,uint64x2_t __b)1443 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1444 {
1445   return __a ^ __b;
1446 }
1447 
1448 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vbic_s8(int8x8_t __a,int8x8_t __b)1449 vbic_s8 (int8x8_t __a, int8x8_t __b)
1450 {
1451   return __a & ~__b;
1452 }
1453 
1454 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vbic_s16(int16x4_t __a,int16x4_t __b)1455 vbic_s16 (int16x4_t __a, int16x4_t __b)
1456 {
1457   return __a & ~__b;
1458 }
1459 
1460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vbic_s32(int32x2_t __a,int32x2_t __b)1461 vbic_s32 (int32x2_t __a, int32x2_t __b)
1462 {
1463   return __a & ~__b;
1464 }
1465 
1466 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vbic_u8(uint8x8_t __a,uint8x8_t __b)1467 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1468 {
1469   return __a & ~__b;
1470 }
1471 
1472 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vbic_u16(uint16x4_t __a,uint16x4_t __b)1473 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1474 {
1475   return __a & ~__b;
1476 }
1477 
1478 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vbic_u32(uint32x2_t __a,uint32x2_t __b)1479 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1480 {
1481   return __a & ~__b;
1482 }
1483 
1484 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vbic_s64(int64x1_t __a,int64x1_t __b)1485 vbic_s64 (int64x1_t __a, int64x1_t __b)
1486 {
1487   return __a & ~__b;
1488 }
1489 
1490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vbic_u64(uint64x1_t __a,uint64x1_t __b)1491 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1492 {
1493   return __a & ~__b;
1494 }
1495 
1496 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vbicq_s8(int8x16_t __a,int8x16_t __b)1497 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1498 {
1499   return __a & ~__b;
1500 }
1501 
1502 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vbicq_s16(int16x8_t __a,int16x8_t __b)1503 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1504 {
1505   return __a & ~__b;
1506 }
1507 
1508 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vbicq_s32(int32x4_t __a,int32x4_t __b)1509 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1510 {
1511   return __a & ~__b;
1512 }
1513 
1514 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vbicq_s64(int64x2_t __a,int64x2_t __b)1515 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1516 {
1517   return __a & ~__b;
1518 }
1519 
1520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vbicq_u8(uint8x16_t __a,uint8x16_t __b)1521 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1522 {
1523   return __a & ~__b;
1524 }
1525 
1526 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vbicq_u16(uint16x8_t __a,uint16x8_t __b)1527 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1528 {
1529   return __a & ~__b;
1530 }
1531 
1532 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vbicq_u32(uint32x4_t __a,uint32x4_t __b)1533 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1534 {
1535   return __a & ~__b;
1536 }
1537 
1538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vbicq_u64(uint64x2_t __a,uint64x2_t __b)1539 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1540 {
1541   return __a & ~__b;
1542 }
1543 
1544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vorn_s8(int8x8_t __a,int8x8_t __b)1545 vorn_s8 (int8x8_t __a, int8x8_t __b)
1546 {
1547   return __a | ~__b;
1548 }
1549 
1550 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vorn_s16(int16x4_t __a,int16x4_t __b)1551 vorn_s16 (int16x4_t __a, int16x4_t __b)
1552 {
1553   return __a | ~__b;
1554 }
1555 
1556 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vorn_s32(int32x2_t __a,int32x2_t __b)1557 vorn_s32 (int32x2_t __a, int32x2_t __b)
1558 {
1559   return __a | ~__b;
1560 }
1561 
1562 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vorn_u8(uint8x8_t __a,uint8x8_t __b)1563 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1564 {
1565   return __a | ~__b;
1566 }
1567 
1568 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vorn_u16(uint16x4_t __a,uint16x4_t __b)1569 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1570 {
1571   return __a | ~__b;
1572 }
1573 
1574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vorn_u32(uint32x2_t __a,uint32x2_t __b)1575 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1576 {
1577   return __a | ~__b;
1578 }
1579 
1580 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vorn_s64(int64x1_t __a,int64x1_t __b)1581 vorn_s64 (int64x1_t __a, int64x1_t __b)
1582 {
1583   return __a | ~__b;
1584 }
1585 
1586 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vorn_u64(uint64x1_t __a,uint64x1_t __b)1587 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1588 {
1589   return __a | ~__b;
1590 }
1591 
1592 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vornq_s8(int8x16_t __a,int8x16_t __b)1593 vornq_s8 (int8x16_t __a, int8x16_t __b)
1594 {
1595   return __a | ~__b;
1596 }
1597 
1598 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vornq_s16(int16x8_t __a,int16x8_t __b)1599 vornq_s16 (int16x8_t __a, int16x8_t __b)
1600 {
1601   return __a | ~__b;
1602 }
1603 
1604 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vornq_s32(int32x4_t __a,int32x4_t __b)1605 vornq_s32 (int32x4_t __a, int32x4_t __b)
1606 {
1607   return __a | ~__b;
1608 }
1609 
1610 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vornq_s64(int64x2_t __a,int64x2_t __b)1611 vornq_s64 (int64x2_t __a, int64x2_t __b)
1612 {
1613   return __a | ~__b;
1614 }
1615 
1616 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vornq_u8(uint8x16_t __a,uint8x16_t __b)1617 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1618 {
1619   return __a | ~__b;
1620 }
1621 
1622 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vornq_u16(uint16x8_t __a,uint16x8_t __b)1623 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1624 {
1625   return __a | ~__b;
1626 }
1627 
1628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vornq_u32(uint32x4_t __a,uint32x4_t __b)1629 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1630 {
1631   return __a | ~__b;
1632 }
1633 
1634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vornq_u64(uint64x2_t __a,uint64x2_t __b)1635 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1636 {
1637   return __a | ~__b;
1638 }
1639 
1640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsub_s8(int8x8_t __a,int8x8_t __b)1641 vsub_s8 (int8x8_t __a, int8x8_t __b)
1642 {
1643   return __a - __b;
1644 }
1645 
1646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsub_s16(int16x4_t __a,int16x4_t __b)1647 vsub_s16 (int16x4_t __a, int16x4_t __b)
1648 {
1649   return __a - __b;
1650 }
1651 
1652 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsub_s32(int32x2_t __a,int32x2_t __b)1653 vsub_s32 (int32x2_t __a, int32x2_t __b)
1654 {
1655   return __a - __b;
1656 }
1657 
1658 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vsub_f32(float32x2_t __a,float32x2_t __b)1659 vsub_f32 (float32x2_t __a, float32x2_t __b)
1660 {
1661   return __a - __b;
1662 }
1663 
1664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsub_u8(uint8x8_t __a,uint8x8_t __b)1665 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1666 {
1667   return __a - __b;
1668 }
1669 
1670 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsub_u16(uint16x4_t __a,uint16x4_t __b)1671 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1672 {
1673   return __a - __b;
1674 }
1675 
1676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsub_u32(uint32x2_t __a,uint32x2_t __b)1677 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1678 {
1679   return __a - __b;
1680 }
1681 
1682 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsub_s64(int64x1_t __a,int64x1_t __b)1683 vsub_s64 (int64x1_t __a, int64x1_t __b)
1684 {
1685   return __a - __b;
1686 }
1687 
1688 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsub_u64(uint64x1_t __a,uint64x1_t __b)1689 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1690 {
1691   return __a - __b;
1692 }
1693 
1694 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubq_s8(int8x16_t __a,int8x16_t __b)1695 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1696 {
1697   return __a - __b;
1698 }
1699 
1700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubq_s16(int16x8_t __a,int16x8_t __b)1701 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1702 {
1703   return __a - __b;
1704 }
1705 
1706 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubq_s32(int32x4_t __a,int32x4_t __b)1707 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1708 {
1709   return __a - __b;
1710 }
1711 
1712 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubq_s64(int64x2_t __a,int64x2_t __b)1713 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1714 {
1715   return __a - __b;
1716 }
1717 
1718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsubq_f32(float32x4_t __a,float32x4_t __b)1719 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1720 {
1721   return __a - __b;
1722 }
1723 
1724 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vsubq_f64(float64x2_t __a,float64x2_t __b)1725 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1726 {
1727   return __a - __b;
1728 }
1729 
1730 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubq_u8(uint8x16_t __a,uint8x16_t __b)1731 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1732 {
1733   return __a - __b;
1734 }
1735 
1736 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubq_u16(uint16x8_t __a,uint16x8_t __b)1737 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1738 {
1739   return __a - __b;
1740 }
1741 
1742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubq_u32(uint32x4_t __a,uint32x4_t __b)1743 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1744 {
1745   return __a - __b;
1746 }
1747 
1748 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubq_u64(uint64x2_t __a,uint64x2_t __b)1749 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1750 {
1751   return __a - __b;
1752 }
1753 
1754 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubl_s8(int8x8_t __a,int8x8_t __b)1755 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1756 {
1757   return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1758 }
1759 
1760 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubl_s16(int16x4_t __a,int16x4_t __b)1761 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1762 {
1763   return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1764 }
1765 
1766 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubl_s32(int32x2_t __a,int32x2_t __b)1767 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1768 {
1769   return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1770 }
1771 
1772 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubl_u8(uint8x8_t __a,uint8x8_t __b)1773 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1774 {
1775   return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1776 						   (int8x8_t) __b);
1777 }
1778 
1779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubl_u16(uint16x4_t __a,uint16x4_t __b)1780 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1781 {
1782   return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1783 						   (int16x4_t) __b);
1784 }
1785 
1786 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubl_u32(uint32x2_t __a,uint32x2_t __b)1787 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1788 {
1789   return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1790 						   (int32x2_t) __b);
1791 }
1792 
1793 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubl_high_s8(int8x16_t __a,int8x16_t __b)1794 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1795 {
1796   return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1797 }
1798 
1799 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubl_high_s16(int16x8_t __a,int16x8_t __b)1800 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1801 {
1802   return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1803 }
1804 
1805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubl_high_s32(int32x4_t __a,int32x4_t __b)1806 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1807 {
1808   return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1809 }
1810 
1811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubl_high_u8(uint8x16_t __a,uint8x16_t __b)1812 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1813 {
1814   return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1815 						     (int8x16_t) __b);
1816 }
1817 
1818 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubl_high_u16(uint16x8_t __a,uint16x8_t __b)1819 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1820 {
1821   return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1822 						    (int16x8_t) __b);
1823 }
1824 
1825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubl_high_u32(uint32x4_t __a,uint32x4_t __b)1826 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1827 {
1828   return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1829 						    (int32x4_t) __b);
1830 }
1831 
1832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubw_s8(int16x8_t __a,int8x8_t __b)1833 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1834 {
1835   return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1836 }
1837 
1838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubw_s16(int32x4_t __a,int16x4_t __b)1839 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1840 {
1841   return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1842 }
1843 
1844 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubw_s32(int64x2_t __a,int32x2_t __b)1845 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1846 {
1847   return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1848 }
1849 
1850 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubw_u8(uint16x8_t __a,uint8x8_t __b)1851 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1852 {
1853   return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
1854 						   (int8x8_t) __b);
1855 }
1856 
1857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubw_u16(uint32x4_t __a,uint16x4_t __b)1858 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
1859 {
1860   return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
1861 						   (int16x4_t) __b);
1862 }
1863 
1864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubw_u32(uint64x2_t __a,uint32x2_t __b)1865 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
1866 {
1867   return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
1868 						   (int32x2_t) __b);
1869 }
1870 
1871 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubw_high_s8(int16x8_t __a,int8x16_t __b)1872 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
1873 {
1874   return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
1875 }
1876 
1877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubw_high_s16(int32x4_t __a,int16x8_t __b)1878 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
1879 {
1880   return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
1881 }
1882 
1883 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsubw_high_s32(int64x2_t __a,int32x4_t __b)1884 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
1885 {
1886   return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
1887 }
1888 
1889 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubw_high_u8(uint16x8_t __a,uint8x16_t __b)1890 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
1891 {
1892   return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
1893 						     (int8x16_t) __b);
1894 }
1895 
1896 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubw_high_u16(uint32x4_t __a,uint16x8_t __b)1897 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
1898 {
1899   return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
1900 						    (int16x8_t) __b);
1901 }
1902 
1903 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsubw_high_u32(uint64x2_t __a,uint32x4_t __b)1904 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
1905 {
1906   return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
1907 						    (int32x4_t) __b);
1908 }
1909 
1910 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqadd_s8(int8x8_t __a,int8x8_t __b)1911 vqadd_s8 (int8x8_t __a, int8x8_t __b)
1912 {
1913   return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
1914 }
1915 
1916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqadd_s16(int16x4_t __a,int16x4_t __b)1917 vqadd_s16 (int16x4_t __a, int16x4_t __b)
1918 {
1919   return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
1920 }
1921 
1922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqadd_s32(int32x2_t __a,int32x2_t __b)1923 vqadd_s32 (int32x2_t __a, int32x2_t __b)
1924 {
1925   return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
1926 }
1927 
1928 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqadd_s64(int64x1_t __a,int64x1_t __b)1929 vqadd_s64 (int64x1_t __a, int64x1_t __b)
1930 {
1931   return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
1932 }
1933 
1934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqadd_u8(uint8x8_t __a,uint8x8_t __b)1935 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
1936 {
1937   return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
1938 						  (int8x8_t) __b);
1939 }
1940 
1941 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqadd_u16(uint16x4_t __a,uint16x4_t __b)1942 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
1943 {
1944   return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
1945 						   (int16x4_t) __b);
1946 }
1947 
1948 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqadd_u32(uint32x2_t __a,uint32x2_t __b)1949 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
1950 {
1951   return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
1952 						   (int32x2_t) __b);
1953 }
1954 
1955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqadd_u64(uint64x1_t __a,uint64x1_t __b)1956 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
1957 {
1958   return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
1959 						 (int64x1_t) __b);
1960 }
1961 
1962 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqaddq_s8(int8x16_t __a,int8x16_t __b)1963 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
1964 {
1965   return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
1966 }
1967 
1968 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqaddq_s16(int16x8_t __a,int16x8_t __b)1969 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
1970 {
1971   return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
1972 }
1973 
1974 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqaddq_s32(int32x4_t __a,int32x4_t __b)1975 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
1976 {
1977   return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
1978 }
1979 
1980 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqaddq_s64(int64x2_t __a,int64x2_t __b)1981 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
1982 {
1983   return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
1984 }
1985 
1986 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqaddq_u8(uint8x16_t __a,uint8x16_t __b)1987 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1988 {
1989   return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
1990 						    (int8x16_t) __b);
1991 }
1992 
1993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqaddq_u16(uint16x8_t __a,uint16x8_t __b)1994 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1995 {
1996   return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
1997 						   (int16x8_t) __b);
1998 }
1999 
2000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqaddq_u32(uint32x4_t __a,uint32x4_t __b)2001 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2002 {
2003   return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2004 						   (int32x4_t) __b);
2005 }
2006 
2007 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqaddq_u64(uint64x2_t __a,uint64x2_t __b)2008 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2009 {
2010   return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2011 						   (int64x2_t) __b);
2012 }
2013 
2014 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqsub_s8(int8x8_t __a,int8x8_t __b)2015 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2016 {
2017   return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2018 }
2019 
2020 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqsub_s16(int16x4_t __a,int16x4_t __b)2021 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2022 {
2023   return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2024 }
2025 
2026 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqsub_s32(int32x2_t __a,int32x2_t __b)2027 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2028 {
2029   return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2030 }
2031 
2032 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqsub_s64(int64x1_t __a,int64x1_t __b)2033 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2034 {
2035   return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2036 }
2037 
2038 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqsub_u8(uint8x8_t __a,uint8x8_t __b)2039 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2040 {
2041   return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2042 						  (int8x8_t) __b);
2043 }
2044 
2045 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqsub_u16(uint16x4_t __a,uint16x4_t __b)2046 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2047 {
2048   return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2049 						   (int16x4_t) __b);
2050 }
2051 
2052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqsub_u32(uint32x2_t __a,uint32x2_t __b)2053 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2054 {
2055   return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2056 						   (int32x2_t) __b);
2057 }
2058 
2059 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqsub_u64(uint64x1_t __a,uint64x1_t __b)2060 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2061 {
2062   return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2063 						 (int64x1_t) __b);
2064 }
2065 
2066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqsubq_s8(int8x16_t __a,int8x16_t __b)2067 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2068 {
2069   return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2070 }
2071 
2072 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqsubq_s16(int16x8_t __a,int16x8_t __b)2073 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2074 {
2075   return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2076 }
2077 
2078 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqsubq_s32(int32x4_t __a,int32x4_t __b)2079 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2080 {
2081   return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2082 }
2083 
2084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqsubq_s64(int64x2_t __a,int64x2_t __b)2085 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2086 {
2087   return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2088 }
2089 
2090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqsubq_u8(uint8x16_t __a,uint8x16_t __b)2091 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2092 {
2093   return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2094 						    (int8x16_t) __b);
2095 }
2096 
2097 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqsubq_u16(uint16x8_t __a,uint16x8_t __b)2098 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2099 {
2100   return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2101 						   (int16x8_t) __b);
2102 }
2103 
2104 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqsubq_u32(uint32x4_t __a,uint32x4_t __b)2105 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2106 {
2107   return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2108 						   (int32x4_t) __b);
2109 }
2110 
2111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqsubq_u64(uint64x2_t __a,uint64x2_t __b)2112 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2113 {
2114   return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2115 						   (int64x2_t) __b);
2116 }
2117 
2118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqneg_s8(int8x8_t __a)2119 vqneg_s8 (int8x8_t __a)
2120 {
2121   return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2122 }
2123 
2124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqneg_s16(int16x4_t __a)2125 vqneg_s16 (int16x4_t __a)
2126 {
2127   return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2128 }
2129 
2130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqneg_s32(int32x2_t __a)2131 vqneg_s32 (int32x2_t __a)
2132 {
2133   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2134 }
2135 
2136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqnegq_s8(int8x16_t __a)2137 vqnegq_s8 (int8x16_t __a)
2138 {
2139   return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2140 }
2141 
2142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqnegq_s16(int16x8_t __a)2143 vqnegq_s16 (int16x8_t __a)
2144 {
2145   return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2146 }
2147 
2148 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqnegq_s32(int32x4_t __a)2149 vqnegq_s32 (int32x4_t __a)
2150 {
2151   return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2152 }
2153 
2154 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqabs_s8(int8x8_t __a)2155 vqabs_s8 (int8x8_t __a)
2156 {
2157   return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2158 }
2159 
2160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqabs_s16(int16x4_t __a)2161 vqabs_s16 (int16x4_t __a)
2162 {
2163   return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2164 }
2165 
2166 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqabs_s32(int32x2_t __a)2167 vqabs_s32 (int32x2_t __a)
2168 {
2169   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2170 }
2171 
2172 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqabsq_s8(int8x16_t __a)2173 vqabsq_s8 (int8x16_t __a)
2174 {
2175   return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2176 }
2177 
2178 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqabsq_s16(int16x8_t __a)2179 vqabsq_s16 (int16x8_t __a)
2180 {
2181   return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2182 }
2183 
2184 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqabsq_s32(int32x4_t __a)2185 vqabsq_s32 (int32x4_t __a)
2186 {
2187   return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2188 }
2189 
2190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_s16(int16x4_t __a,int16x4_t __b)2191 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2192 {
2193   return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2194 }
2195 
2196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_s32(int32x2_t __a,int32x2_t __b)2197 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2198 {
2199   return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2200 }
2201 
2202 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_s16(int16x8_t __a,int16x8_t __b)2203 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2204 {
2205   return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2206 }
2207 
2208 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_s32(int32x4_t __a,int32x4_t __b)2209 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2210 {
2211   return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2212 }
2213 
2214 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_s16(int16x4_t __a,int16x4_t __b)2215 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2216 {
2217   return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2218 }
2219 
2220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_s32(int32x2_t __a,int32x2_t __b)2221 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2222 {
2223   return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2224 }
2225 
2226 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_s16(int16x8_t __a,int16x8_t __b)2227 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2228 {
2229   return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2230 }
2231 
2232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_s32(int32x4_t __a,int32x4_t __b)2233 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2234 {
2235   return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2236 }
2237 
2238 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcreate_s8(uint64_t __a)2239 vcreate_s8 (uint64_t __a)
2240 {
2241   return (int8x8_t) __a;
2242 }
2243 
2244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcreate_s16(uint64_t __a)2245 vcreate_s16 (uint64_t __a)
2246 {
2247   return (int16x4_t) __a;
2248 }
2249 
2250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcreate_s32(uint64_t __a)2251 vcreate_s32 (uint64_t __a)
2252 {
2253   return (int32x2_t) __a;
2254 }
2255 
2256 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcreate_s64(uint64_t __a)2257 vcreate_s64 (uint64_t __a)
2258 {
2259   return (int64x1_t) __a;
2260 }
2261 
2262 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32(uint64_t __a)2263 vcreate_f32 (uint64_t __a)
2264 {
2265   return (float32x2_t) __a;
2266 }
2267 
2268 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcreate_u8(uint64_t __a)2269 vcreate_u8 (uint64_t __a)
2270 {
2271   return (uint8x8_t) __a;
2272 }
2273 
2274 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcreate_u16(uint64_t __a)2275 vcreate_u16 (uint64_t __a)
2276 {
2277   return (uint16x4_t) __a;
2278 }
2279 
2280 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcreate_u32(uint64_t __a)2281 vcreate_u32 (uint64_t __a)
2282 {
2283   return (uint32x2_t) __a;
2284 }
2285 
2286 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcreate_u64(uint64_t __a)2287 vcreate_u64 (uint64_t __a)
2288 {
2289   return (uint64x1_t) __a;
2290 }
2291 
2292 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcreate_f64(uint64_t __a)2293 vcreate_f64 (uint64_t __a)
2294 {
2295   return (float64x1_t) __builtin_aarch64_createdf (__a);
2296 }
2297 
2298 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcreate_p8(uint64_t __a)2299 vcreate_p8 (uint64_t __a)
2300 {
2301   return (poly8x8_t) __a;
2302 }
2303 
2304 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vcreate_p16(uint64_t __a)2305 vcreate_p16 (uint64_t __a)
2306 {
2307   return (poly16x4_t) __a;
2308 }
2309 
2310 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vget_lane_s8(int8x8_t __a,const int __b)2311 vget_lane_s8 (int8x8_t __a, const int __b)
2312 {
2313   return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b);
2314 }
2315 
2316 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vget_lane_s16(int16x4_t __a,const int __b)2317 vget_lane_s16 (int16x4_t __a, const int __b)
2318 {
2319   return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b);
2320 }
2321 
2322 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vget_lane_s32(int32x2_t __a,const int __b)2323 vget_lane_s32 (int32x2_t __a, const int __b)
2324 {
2325   return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b);
2326 }
2327 
2328 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vget_lane_f32(float32x2_t __a,const int __b)2329 vget_lane_f32 (float32x2_t __a, const int __b)
2330 {
2331   return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b);
2332 }
2333 
2334 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vget_lane_u8(uint8x8_t __a,const int __b)2335 vget_lane_u8 (uint8x8_t __a, const int __b)
2336 {
2337   return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2338 							    __b);
2339 }
2340 
2341 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vget_lane_u16(uint16x4_t __a,const int __b)2342 vget_lane_u16 (uint16x4_t __a, const int __b)
2343 {
2344   return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2345 							     __b);
2346 }
2347 
2348 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vget_lane_u32(uint32x2_t __a,const int __b)2349 vget_lane_u32 (uint32x2_t __a, const int __b)
2350 {
2351   return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a,
2352 							     __b);
2353 }
2354 
2355 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vget_lane_p8(poly8x8_t __a,const int __b)2356 vget_lane_p8 (poly8x8_t __a, const int __b)
2357 {
2358   return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2359 							    __b);
2360 }
2361 
2362 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vget_lane_p16(poly16x4_t __a,const int __b)2363 vget_lane_p16 (poly16x4_t __a, const int __b)
2364 {
2365   return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2366 							     __b);
2367 }
2368 
2369 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vget_lane_s64(int64x1_t __a,const int __b)2370 vget_lane_s64 (int64x1_t __a, const int __b)
2371 {
2372   return (int64_t) __builtin_aarch64_get_lanedi (__a, __b);
2373 }
2374 
2375 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vget_lane_u64(uint64x1_t __a,const int __b)2376 vget_lane_u64 (uint64x1_t __a, const int __b)
2377 {
2378   return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b);
2379 }
2380 
2381 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vgetq_lane_s8(int8x16_t __a,const int __b)2382 vgetq_lane_s8 (int8x16_t __a, const int __b)
2383 {
2384   return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b);
2385 }
2386 
2387 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vgetq_lane_s16(int16x8_t __a,const int __b)2388 vgetq_lane_s16 (int16x8_t __a, const int __b)
2389 {
2390   return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b);
2391 }
2392 
2393 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vgetq_lane_s32(int32x4_t __a,const int __b)2394 vgetq_lane_s32 (int32x4_t __a, const int __b)
2395 {
2396   return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b);
2397 }
2398 
2399 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vgetq_lane_f32(float32x4_t __a,const int __b)2400 vgetq_lane_f32 (float32x4_t __a, const int __b)
2401 {
2402   return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b);
2403 }
2404 
2405 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vgetq_lane_f64(float64x2_t __a,const int __b)2406 vgetq_lane_f64 (float64x2_t __a, const int __b)
2407 {
2408   return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b);
2409 }
2410 
2411 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vgetq_lane_u8(uint8x16_t __a,const int __b)2412 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2413 {
2414   return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2415 							     __b);
2416 }
2417 
2418 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vgetq_lane_u16(uint16x8_t __a,const int __b)2419 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2420 {
2421   return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2422 							     __b);
2423 }
2424 
2425 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vgetq_lane_u32(uint32x4_t __a,const int __b)2426 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2427 {
2428   return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a,
2429 							     __b);
2430 }
2431 
2432 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
vgetq_lane_p8(poly8x16_t __a,const int __b)2433 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2434 {
2435   return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2436 							     __b);
2437 }
2438 
2439 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
vgetq_lane_p16(poly16x8_t __a,const int __b)2440 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2441 {
2442   return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2443 							     __b);
2444 }
2445 
2446 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vgetq_lane_s64(int64x2_t __a,const int __b)2447 vgetq_lane_s64 (int64x2_t __a, const int __b)
2448 {
2449   return __builtin_aarch64_get_lane_unsignedv2di (__a, __b);
2450 }
2451 
2452 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vgetq_lane_u64(uint64x2_t __a,const int __b)2453 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2454 {
2455   return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a,
2456 							     __b);
2457 }
2458 
2459 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s8(int8x8_t __a)2460 vreinterpret_p8_s8 (int8x8_t __a)
2461 {
2462   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2463 }
2464 
2465 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s16(int16x4_t __a)2466 vreinterpret_p8_s16 (int16x4_t __a)
2467 {
2468   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2469 }
2470 
2471 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s32(int32x2_t __a)2472 vreinterpret_p8_s32 (int32x2_t __a)
2473 {
2474   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2475 }
2476 
2477 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s64(int64x1_t __a)2478 vreinterpret_p8_s64 (int64x1_t __a)
2479 {
2480   return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2481 }
2482 
2483 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_f32(float32x2_t __a)2484 vreinterpret_p8_f32 (float32x2_t __a)
2485 {
2486   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2487 }
2488 
2489 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u8(uint8x8_t __a)2490 vreinterpret_p8_u8 (uint8x8_t __a)
2491 {
2492   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2493 }
2494 
2495 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u16(uint16x4_t __a)2496 vreinterpret_p8_u16 (uint16x4_t __a)
2497 {
2498   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2499 }
2500 
2501 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u32(uint32x2_t __a)2502 vreinterpret_p8_u32 (uint32x2_t __a)
2503 {
2504   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2505 }
2506 
2507 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_u64(uint64x1_t __a)2508 vreinterpret_p8_u64 (uint64x1_t __a)
2509 {
2510   return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2511 }
2512 
2513 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_p16(poly16x4_t __a)2514 vreinterpret_p8_p16 (poly16x4_t __a)
2515 {
2516   return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2517 }
2518 
2519 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s8(int8x16_t __a)2520 vreinterpretq_p8_s8 (int8x16_t __a)
2521 {
2522   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2523 }
2524 
2525 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s16(int16x8_t __a)2526 vreinterpretq_p8_s16 (int16x8_t __a)
2527 {
2528   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2529 }
2530 
2531 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s32(int32x4_t __a)2532 vreinterpretq_p8_s32 (int32x4_t __a)
2533 {
2534   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2535 }
2536 
2537 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s64(int64x2_t __a)2538 vreinterpretq_p8_s64 (int64x2_t __a)
2539 {
2540   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2541 }
2542 
2543 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f32(float32x4_t __a)2544 vreinterpretq_p8_f32 (float32x4_t __a)
2545 {
2546   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2547 }
2548 
2549 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u8(uint8x16_t __a)2550 vreinterpretq_p8_u8 (uint8x16_t __a)
2551 {
2552   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2553 							       __a);
2554 }
2555 
2556 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u16(uint16x8_t __a)2557 vreinterpretq_p8_u16 (uint16x8_t __a)
2558 {
2559   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2560 							      __a);
2561 }
2562 
2563 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u32(uint32x4_t __a)2564 vreinterpretq_p8_u32 (uint32x4_t __a)
2565 {
2566   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2567 							      __a);
2568 }
2569 
2570 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_u64(uint64x2_t __a)2571 vreinterpretq_p8_u64 (uint64x2_t __a)
2572 {
2573   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2574 							      __a);
2575 }
2576 
2577 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_p16(poly16x8_t __a)2578 vreinterpretq_p8_p16 (poly16x8_t __a)
2579 {
2580   return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2581 							      __a);
2582 }
2583 
2584 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s8(int8x8_t __a)2585 vreinterpret_p16_s8 (int8x8_t __a)
2586 {
2587   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2588 }
2589 
2590 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s16(int16x4_t __a)2591 vreinterpret_p16_s16 (int16x4_t __a)
2592 {
2593   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2594 }
2595 
2596 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s32(int32x2_t __a)2597 vreinterpret_p16_s32 (int32x2_t __a)
2598 {
2599   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2600 }
2601 
2602 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s64(int64x1_t __a)2603 vreinterpret_p16_s64 (int64x1_t __a)
2604 {
2605   return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2606 }
2607 
2608 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f32(float32x2_t __a)2609 vreinterpret_p16_f32 (float32x2_t __a)
2610 {
2611   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2612 }
2613 
2614 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u8(uint8x8_t __a)2615 vreinterpret_p16_u8 (uint8x8_t __a)
2616 {
2617   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2618 }
2619 
2620 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u16(uint16x4_t __a)2621 vreinterpret_p16_u16 (uint16x4_t __a)
2622 {
2623   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2624 }
2625 
2626 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u32(uint32x2_t __a)2627 vreinterpret_p16_u32 (uint32x2_t __a)
2628 {
2629   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2630 }
2631 
2632 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_u64(uint64x1_t __a)2633 vreinterpret_p16_u64 (uint64x1_t __a)
2634 {
2635   return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2636 }
2637 
2638 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_p8(poly8x8_t __a)2639 vreinterpret_p16_p8 (poly8x8_t __a)
2640 {
2641   return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2642 }
2643 
2644 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s8(int8x16_t __a)2645 vreinterpretq_p16_s8 (int8x16_t __a)
2646 {
2647   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2648 }
2649 
2650 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s16(int16x8_t __a)2651 vreinterpretq_p16_s16 (int16x8_t __a)
2652 {
2653   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2654 }
2655 
2656 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s32(int32x4_t __a)2657 vreinterpretq_p16_s32 (int32x4_t __a)
2658 {
2659   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2660 }
2661 
2662 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s64(int64x2_t __a)2663 vreinterpretq_p16_s64 (int64x2_t __a)
2664 {
2665   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2666 }
2667 
2668 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f32(float32x4_t __a)2669 vreinterpretq_p16_f32 (float32x4_t __a)
2670 {
2671   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2672 }
2673 
2674 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u8(uint8x16_t __a)2675 vreinterpretq_p16_u8 (uint8x16_t __a)
2676 {
2677   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2678 							      __a);
2679 }
2680 
2681 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u16(uint16x8_t __a)2682 vreinterpretq_p16_u16 (uint16x8_t __a)
2683 {
2684   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2685 }
2686 
2687 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u32(uint32x4_t __a)2688 vreinterpretq_p16_u32 (uint32x4_t __a)
2689 {
2690   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2691 }
2692 
2693 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_u64(uint64x2_t __a)2694 vreinterpretq_p16_u64 (uint64x2_t __a)
2695 {
2696   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2697 }
2698 
2699 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_p8(poly8x16_t __a)2700 vreinterpretq_p16_p8 (poly8x16_t __a)
2701 {
2702   return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2703 							      __a);
2704 }
2705 
2706 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s8(int8x8_t __a)2707 vreinterpret_f32_s8 (int8x8_t __a)
2708 {
2709   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2710 }
2711 
2712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s16(int16x4_t __a)2713 vreinterpret_f32_s16 (int16x4_t __a)
2714 {
2715   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2716 }
2717 
2718 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s32(int32x2_t __a)2719 vreinterpret_f32_s32 (int32x2_t __a)
2720 {
2721   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2722 }
2723 
2724 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s64(int64x1_t __a)2725 vreinterpret_f32_s64 (int64x1_t __a)
2726 {
2727   return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2728 }
2729 
2730 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u8(uint8x8_t __a)2731 vreinterpret_f32_u8 (uint8x8_t __a)
2732 {
2733   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2734 }
2735 
2736 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u16(uint16x4_t __a)2737 vreinterpret_f32_u16 (uint16x4_t __a)
2738 {
2739   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2740 							      __a);
2741 }
2742 
2743 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u32(uint32x2_t __a)2744 vreinterpret_f32_u32 (uint32x2_t __a)
2745 {
2746   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2747 							      __a);
2748 }
2749 
2750 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_u64(uint64x1_t __a)2751 vreinterpret_f32_u64 (uint64x1_t __a)
2752 {
2753   return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2754 }
2755 
2756 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p8(poly8x8_t __a)2757 vreinterpret_f32_p8 (poly8x8_t __a)
2758 {
2759   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2760 }
2761 
2762 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p16(poly16x4_t __a)2763 vreinterpret_f32_p16 (poly16x4_t __a)
2764 {
2765   return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2766 							      __a);
2767 }
2768 
2769 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s8(int8x16_t __a)2770 vreinterpretq_f32_s8 (int8x16_t __a)
2771 {
2772   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2773 }
2774 
2775 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s16(int16x8_t __a)2776 vreinterpretq_f32_s16 (int16x8_t __a)
2777 {
2778   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2779 }
2780 
2781 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s32(int32x4_t __a)2782 vreinterpretq_f32_s32 (int32x4_t __a)
2783 {
2784   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2785 }
2786 
2787 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s64(int64x2_t __a)2788 vreinterpretq_f32_s64 (int64x2_t __a)
2789 {
2790   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2791 }
2792 
2793 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u8(uint8x16_t __a)2794 vreinterpretq_f32_u8 (uint8x16_t __a)
2795 {
2796   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2797 							       __a);
2798 }
2799 
2800 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u16(uint16x8_t __a)2801 vreinterpretq_f32_u16 (uint16x8_t __a)
2802 {
2803   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2804 							      __a);
2805 }
2806 
2807 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u32(uint32x4_t __a)2808 vreinterpretq_f32_u32 (uint32x4_t __a)
2809 {
2810   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2811 							      __a);
2812 }
2813 
2814 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_u64(uint64x2_t __a)2815 vreinterpretq_f32_u64 (uint64x2_t __a)
2816 {
2817   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
2818 							      __a);
2819 }
2820 
2821 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p8(poly8x16_t __a)2822 vreinterpretq_f32_p8 (poly8x16_t __a)
2823 {
2824   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2825 							       __a);
2826 }
2827 
2828 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p16(poly16x8_t __a)2829 vreinterpretq_f32_p16 (poly16x8_t __a)
2830 {
2831   return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2832 							      __a);
2833 }
2834 
2835 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s8(int8x8_t __a)2836 vreinterpret_s64_s8 (int8x8_t __a)
2837 {
2838   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2839 }
2840 
2841 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s16(int16x4_t __a)2842 vreinterpret_s64_s16 (int16x4_t __a)
2843 {
2844   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2845 }
2846 
2847 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_s32(int32x2_t __a)2848 vreinterpret_s64_s32 (int32x2_t __a)
2849 {
2850   return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2851 }
2852 
2853 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_f32(float32x2_t __a)2854 vreinterpret_s64_f32 (float32x2_t __a)
2855 {
2856   return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2857 }
2858 
2859 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u8(uint8x8_t __a)2860 vreinterpret_s64_u8 (uint8x8_t __a)
2861 {
2862   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2863 }
2864 
2865 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u16(uint16x4_t __a)2866 vreinterpret_s64_u16 (uint16x4_t __a)
2867 {
2868   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2869 }
2870 
2871 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u32(uint32x2_t __a)2872 vreinterpret_s64_u32 (uint32x2_t __a)
2873 {
2874   return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
2875 }
2876 
2877 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u64(uint64x1_t __a)2878 vreinterpret_s64_u64 (uint64x1_t __a)
2879 {
2880   return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
2881 }
2882 
2883 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p8(poly8x8_t __a)2884 vreinterpret_s64_p8 (poly8x8_t __a)
2885 {
2886   return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2887 }
2888 
2889 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p16(poly16x4_t __a)2890 vreinterpret_s64_p16 (poly16x4_t __a)
2891 {
2892   return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2893 }
2894 
2895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s8(int8x16_t __a)2896 vreinterpretq_s64_s8 (int8x16_t __a)
2897 {
2898   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
2899 }
2900 
2901 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s16(int16x8_t __a)2902 vreinterpretq_s64_s16 (int16x8_t __a)
2903 {
2904   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
2905 }
2906 
2907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_s32(int32x4_t __a)2908 vreinterpretq_s64_s32 (int32x4_t __a)
2909 {
2910   return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
2911 }
2912 
2913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f32(float32x4_t __a)2914 vreinterpretq_s64_f32 (float32x4_t __a)
2915 {
2916   return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
2917 }
2918 
2919 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u8(uint8x16_t __a)2920 vreinterpretq_s64_u8 (uint8x16_t __a)
2921 {
2922   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2923 }
2924 
2925 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u16(uint16x8_t __a)2926 vreinterpretq_s64_u16 (uint16x8_t __a)
2927 {
2928   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2929 }
2930 
2931 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u32(uint32x4_t __a)2932 vreinterpretq_s64_u32 (uint32x4_t __a)
2933 {
2934   return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
2935 }
2936 
2937 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u64(uint64x2_t __a)2938 vreinterpretq_s64_u64 (uint64x2_t __a)
2939 {
2940   return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
2941 }
2942 
2943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p8(poly8x16_t __a)2944 vreinterpretq_s64_p8 (poly8x16_t __a)
2945 {
2946   return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2947 }
2948 
2949 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p16(poly16x8_t __a)2950 vreinterpretq_s64_p16 (poly16x8_t __a)
2951 {
2952   return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2953 }
2954 
2955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s8(int8x8_t __a)2956 vreinterpret_u64_s8 (int8x8_t __a)
2957 {
2958   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2959 }
2960 
2961 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s16(int16x4_t __a)2962 vreinterpret_u64_s16 (int16x4_t __a)
2963 {
2964   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2965 }
2966 
2967 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s32(int32x2_t __a)2968 vreinterpret_u64_s32 (int32x2_t __a)
2969 {
2970   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2971 }
2972 
2973 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s64(int64x1_t __a)2974 vreinterpret_u64_s64 (int64x1_t __a)
2975 {
2976   return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
2977 }
2978 
2979 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f32(float32x2_t __a)2980 vreinterpret_u64_f32 (float32x2_t __a)
2981 {
2982   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2983 }
2984 
2985 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u8(uint8x8_t __a)2986 vreinterpret_u64_u8 (uint8x8_t __a)
2987 {
2988   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2989 }
2990 
2991 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u16(uint16x4_t __a)2992 vreinterpret_u64_u16 (uint16x4_t __a)
2993 {
2994   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2995 }
2996 
2997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_u32(uint32x2_t __a)2998 vreinterpret_u64_u32 (uint32x2_t __a)
2999 {
3000   return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3001 }
3002 
3003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_p8(poly8x8_t __a)3004 vreinterpret_u64_p8 (poly8x8_t __a)
3005 {
3006   return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3007 }
3008 
3009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_p16(poly16x4_t __a)3010 vreinterpret_u64_p16 (poly16x4_t __a)
3011 {
3012   return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3013 }
3014 
3015 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s8(int8x16_t __a)3016 vreinterpretq_u64_s8 (int8x16_t __a)
3017 {
3018   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3019 }
3020 
3021 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s16(int16x8_t __a)3022 vreinterpretq_u64_s16 (int16x8_t __a)
3023 {
3024   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3025 }
3026 
3027 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s32(int32x4_t __a)3028 vreinterpretq_u64_s32 (int32x4_t __a)
3029 {
3030   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3031 }
3032 
3033 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s64(int64x2_t __a)3034 vreinterpretq_u64_s64 (int64x2_t __a)
3035 {
3036   return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3037 }
3038 
3039 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f32(float32x4_t __a)3040 vreinterpretq_u64_f32 (float32x4_t __a)
3041 {
3042   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3043 }
3044 
3045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u8(uint8x16_t __a)3046 vreinterpretq_u64_u8 (uint8x16_t __a)
3047 {
3048   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3049 							      __a);
3050 }
3051 
3052 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u16(uint16x8_t __a)3053 vreinterpretq_u64_u16 (uint16x8_t __a)
3054 {
3055   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3056 }
3057 
3058 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_u32(uint32x4_t __a)3059 vreinterpretq_u64_u32 (uint32x4_t __a)
3060 {
3061   return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3062 }
3063 
3064 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p8(poly8x16_t __a)3065 vreinterpretq_u64_p8 (poly8x16_t __a)
3066 {
3067   return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3068 							      __a);
3069 }
3070 
3071 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p16(poly16x8_t __a)3072 vreinterpretq_u64_p16 (poly16x8_t __a)
3073 {
3074   return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3075 }
3076 
3077 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s16(int16x4_t __a)3078 vreinterpret_s8_s16 (int16x4_t __a)
3079 {
3080   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3081 }
3082 
3083 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s32(int32x2_t __a)3084 vreinterpret_s8_s32 (int32x2_t __a)
3085 {
3086   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3087 }
3088 
3089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s64(int64x1_t __a)3090 vreinterpret_s8_s64 (int64x1_t __a)
3091 {
3092   return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3093 }
3094 
3095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f32(float32x2_t __a)3096 vreinterpret_s8_f32 (float32x2_t __a)
3097 {
3098   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3099 }
3100 
3101 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u8(uint8x8_t __a)3102 vreinterpret_s8_u8 (uint8x8_t __a)
3103 {
3104   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3105 }
3106 
3107 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u16(uint16x4_t __a)3108 vreinterpret_s8_u16 (uint16x4_t __a)
3109 {
3110   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3111 }
3112 
3113 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u32(uint32x2_t __a)3114 vreinterpret_s8_u32 (uint32x2_t __a)
3115 {
3116   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3117 }
3118 
3119 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_u64(uint64x1_t __a)3120 vreinterpret_s8_u64 (uint64x1_t __a)
3121 {
3122   return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3123 }
3124 
3125 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_p8(poly8x8_t __a)3126 vreinterpret_s8_p8 (poly8x8_t __a)
3127 {
3128   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3129 }
3130 
3131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_p16(poly16x4_t __a)3132 vreinterpret_s8_p16 (poly16x4_t __a)
3133 {
3134   return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3135 }
3136 
3137 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s16(int16x8_t __a)3138 vreinterpretq_s8_s16 (int16x8_t __a)
3139 {
3140   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3141 }
3142 
3143 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s32(int32x4_t __a)3144 vreinterpretq_s8_s32 (int32x4_t __a)
3145 {
3146   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3147 }
3148 
3149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s64(int64x2_t __a)3150 vreinterpretq_s8_s64 (int64x2_t __a)
3151 {
3152   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3153 }
3154 
3155 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f32(float32x4_t __a)3156 vreinterpretq_s8_f32 (float32x4_t __a)
3157 {
3158   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3159 }
3160 
3161 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u8(uint8x16_t __a)3162 vreinterpretq_s8_u8 (uint8x16_t __a)
3163 {
3164   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3165 							      __a);
3166 }
3167 
3168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u16(uint16x8_t __a)3169 vreinterpretq_s8_u16 (uint16x8_t __a)
3170 {
3171   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3172 }
3173 
3174 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u32(uint32x4_t __a)3175 vreinterpretq_s8_u32 (uint32x4_t __a)
3176 {
3177   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3178 }
3179 
3180 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_u64(uint64x2_t __a)3181 vreinterpretq_s8_u64 (uint64x2_t __a)
3182 {
3183   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3184 }
3185 
3186 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p8(poly8x16_t __a)3187 vreinterpretq_s8_p8 (poly8x16_t __a)
3188 {
3189   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3190 							      __a);
3191 }
3192 
3193 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p16(poly16x8_t __a)3194 vreinterpretq_s8_p16 (poly16x8_t __a)
3195 {
3196   return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3197 }
3198 
3199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s8(int8x8_t __a)3200 vreinterpret_s16_s8 (int8x8_t __a)
3201 {
3202   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3203 }
3204 
3205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s32(int32x2_t __a)3206 vreinterpret_s16_s32 (int32x2_t __a)
3207 {
3208   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3209 }
3210 
3211 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s64(int64x1_t __a)3212 vreinterpret_s16_s64 (int64x1_t __a)
3213 {
3214   return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3215 }
3216 
3217 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f32(float32x2_t __a)3218 vreinterpret_s16_f32 (float32x2_t __a)
3219 {
3220   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3221 }
3222 
3223 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u8(uint8x8_t __a)3224 vreinterpret_s16_u8 (uint8x8_t __a)
3225 {
3226   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3227 }
3228 
3229 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u16(uint16x4_t __a)3230 vreinterpret_s16_u16 (uint16x4_t __a)
3231 {
3232   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3233 }
3234 
3235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u32(uint32x2_t __a)3236 vreinterpret_s16_u32 (uint32x2_t __a)
3237 {
3238   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3239 }
3240 
3241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_u64(uint64x1_t __a)3242 vreinterpret_s16_u64 (uint64x1_t __a)
3243 {
3244   return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3245 }
3246 
3247 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_p8(poly8x8_t __a)3248 vreinterpret_s16_p8 (poly8x8_t __a)
3249 {
3250   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3251 }
3252 
3253 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_p16(poly16x4_t __a)3254 vreinterpret_s16_p16 (poly16x4_t __a)
3255 {
3256   return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3257 }
3258 
3259 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s8(int8x16_t __a)3260 vreinterpretq_s16_s8 (int8x16_t __a)
3261 {
3262   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3263 }
3264 
3265 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s32(int32x4_t __a)3266 vreinterpretq_s16_s32 (int32x4_t __a)
3267 {
3268   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3269 }
3270 
3271 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s64(int64x2_t __a)3272 vreinterpretq_s16_s64 (int64x2_t __a)
3273 {
3274   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3275 }
3276 
3277 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f32(float32x4_t __a)3278 vreinterpretq_s16_f32 (float32x4_t __a)
3279 {
3280   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3281 }
3282 
3283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u8(uint8x16_t __a)3284 vreinterpretq_s16_u8 (uint8x16_t __a)
3285 {
3286   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3287 }
3288 
3289 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u16(uint16x8_t __a)3290 vreinterpretq_s16_u16 (uint16x8_t __a)
3291 {
3292   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3293 }
3294 
3295 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u32(uint32x4_t __a)3296 vreinterpretq_s16_u32 (uint32x4_t __a)
3297 {
3298   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3299 }
3300 
3301 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_u64(uint64x2_t __a)3302 vreinterpretq_s16_u64 (uint64x2_t __a)
3303 {
3304   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3305 }
3306 
3307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p8(poly8x16_t __a)3308 vreinterpretq_s16_p8 (poly8x16_t __a)
3309 {
3310   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3311 }
3312 
3313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p16(poly16x8_t __a)3314 vreinterpretq_s16_p16 (poly16x8_t __a)
3315 {
3316   return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3317 }
3318 
3319 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s8(int8x8_t __a)3320 vreinterpret_s32_s8 (int8x8_t __a)
3321 {
3322   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3323 }
3324 
3325 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s16(int16x4_t __a)3326 vreinterpret_s32_s16 (int16x4_t __a)
3327 {
3328   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3329 }
3330 
3331 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s64(int64x1_t __a)3332 vreinterpret_s32_s64 (int64x1_t __a)
3333 {
3334   return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3335 }
3336 
3337 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f32(float32x2_t __a)3338 vreinterpret_s32_f32 (float32x2_t __a)
3339 {
3340   return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3341 }
3342 
3343 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u8(uint8x8_t __a)3344 vreinterpret_s32_u8 (uint8x8_t __a)
3345 {
3346   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3347 }
3348 
3349 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u16(uint16x4_t __a)3350 vreinterpret_s32_u16 (uint16x4_t __a)
3351 {
3352   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3353 }
3354 
3355 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u32(uint32x2_t __a)3356 vreinterpret_s32_u32 (uint32x2_t __a)
3357 {
3358   return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3359 }
3360 
3361 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_u64(uint64x1_t __a)3362 vreinterpret_s32_u64 (uint64x1_t __a)
3363 {
3364   return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3365 }
3366 
3367 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_p8(poly8x8_t __a)3368 vreinterpret_s32_p8 (poly8x8_t __a)
3369 {
3370   return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3371 }
3372 
3373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_p16(poly16x4_t __a)3374 vreinterpret_s32_p16 (poly16x4_t __a)
3375 {
3376   return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3377 }
3378 
3379 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s8(int8x16_t __a)3380 vreinterpretq_s32_s8 (int8x16_t __a)
3381 {
3382   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3383 }
3384 
3385 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s16(int16x8_t __a)3386 vreinterpretq_s32_s16 (int16x8_t __a)
3387 {
3388   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3389 }
3390 
3391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s64(int64x2_t __a)3392 vreinterpretq_s32_s64 (int64x2_t __a)
3393 {
3394   return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3395 }
3396 
3397 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f32(float32x4_t __a)3398 vreinterpretq_s32_f32 (float32x4_t __a)
3399 {
3400   return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3401 }
3402 
3403 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u8(uint8x16_t __a)3404 vreinterpretq_s32_u8 (uint8x16_t __a)
3405 {
3406   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3407 }
3408 
3409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u16(uint16x8_t __a)3410 vreinterpretq_s32_u16 (uint16x8_t __a)
3411 {
3412   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3413 }
3414 
3415 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u32(uint32x4_t __a)3416 vreinterpretq_s32_u32 (uint32x4_t __a)
3417 {
3418   return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3419 }
3420 
3421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_u64(uint64x2_t __a)3422 vreinterpretq_s32_u64 (uint64x2_t __a)
3423 {
3424   return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3425 }
3426 
3427 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p8(poly8x16_t __a)3428 vreinterpretq_s32_p8 (poly8x16_t __a)
3429 {
3430   return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3431 }
3432 
3433 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p16(poly16x8_t __a)3434 vreinterpretq_s32_p16 (poly16x8_t __a)
3435 {
3436   return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3437 }
3438 
3439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s8(int8x8_t __a)3440 vreinterpret_u8_s8 (int8x8_t __a)
3441 {
3442   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3443 }
3444 
3445 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s16(int16x4_t __a)3446 vreinterpret_u8_s16 (int16x4_t __a)
3447 {
3448   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3449 }
3450 
3451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s32(int32x2_t __a)3452 vreinterpret_u8_s32 (int32x2_t __a)
3453 {
3454   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3455 }
3456 
3457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s64(int64x1_t __a)3458 vreinterpret_u8_s64 (int64x1_t __a)
3459 {
3460   return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3461 }
3462 
3463 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f32(float32x2_t __a)3464 vreinterpret_u8_f32 (float32x2_t __a)
3465 {
3466   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3467 }
3468 
3469 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u16(uint16x4_t __a)3470 vreinterpret_u8_u16 (uint16x4_t __a)
3471 {
3472   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3473 }
3474 
3475 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u32(uint32x2_t __a)3476 vreinterpret_u8_u32 (uint32x2_t __a)
3477 {
3478   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3479 }
3480 
3481 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_u64(uint64x1_t __a)3482 vreinterpret_u8_u64 (uint64x1_t __a)
3483 {
3484   return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3485 }
3486 
3487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_p8(poly8x8_t __a)3488 vreinterpret_u8_p8 (poly8x8_t __a)
3489 {
3490   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3491 }
3492 
3493 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_p16(poly16x4_t __a)3494 vreinterpret_u8_p16 (poly16x4_t __a)
3495 {
3496   return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3497 }
3498 
3499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s8(int8x16_t __a)3500 vreinterpretq_u8_s8 (int8x16_t __a)
3501 {
3502   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3503 }
3504 
3505 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s16(int16x8_t __a)3506 vreinterpretq_u8_s16 (int16x8_t __a)
3507 {
3508   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3509 }
3510 
3511 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s32(int32x4_t __a)3512 vreinterpretq_u8_s32 (int32x4_t __a)
3513 {
3514   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3515 }
3516 
3517 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s64(int64x2_t __a)3518 vreinterpretq_u8_s64 (int64x2_t __a)
3519 {
3520   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3521 }
3522 
3523 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f32(float32x4_t __a)3524 vreinterpretq_u8_f32 (float32x4_t __a)
3525 {
3526   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3527 }
3528 
3529 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u16(uint16x8_t __a)3530 vreinterpretq_u8_u16 (uint16x8_t __a)
3531 {
3532   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3533 							      __a);
3534 }
3535 
3536 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u32(uint32x4_t __a)3537 vreinterpretq_u8_u32 (uint32x4_t __a)
3538 {
3539   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3540 							      __a);
3541 }
3542 
3543 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_u64(uint64x2_t __a)3544 vreinterpretq_u8_u64 (uint64x2_t __a)
3545 {
3546   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3547 							      __a);
3548 }
3549 
3550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p8(poly8x16_t __a)3551 vreinterpretq_u8_p8 (poly8x16_t __a)
3552 {
3553   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3554 							       __a);
3555 }
3556 
3557 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p16(poly16x8_t __a)3558 vreinterpretq_u8_p16 (poly16x8_t __a)
3559 {
3560   return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3561 							      __a);
3562 }
3563 
3564 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s8(int8x8_t __a)3565 vreinterpret_u16_s8 (int8x8_t __a)
3566 {
3567   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3568 }
3569 
3570 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s16(int16x4_t __a)3571 vreinterpret_u16_s16 (int16x4_t __a)
3572 {
3573   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3574 }
3575 
3576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s32(int32x2_t __a)3577 vreinterpret_u16_s32 (int32x2_t __a)
3578 {
3579   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3580 }
3581 
3582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s64(int64x1_t __a)3583 vreinterpret_u16_s64 (int64x1_t __a)
3584 {
3585   return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3586 }
3587 
3588 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f32(float32x2_t __a)3589 vreinterpret_u16_f32 (float32x2_t __a)
3590 {
3591   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3592 }
3593 
3594 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u8(uint8x8_t __a)3595 vreinterpret_u16_u8 (uint8x8_t __a)
3596 {
3597   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3598 }
3599 
3600 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u32(uint32x2_t __a)3601 vreinterpret_u16_u32 (uint32x2_t __a)
3602 {
3603   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3604 }
3605 
3606 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_u64(uint64x1_t __a)3607 vreinterpret_u16_u64 (uint64x1_t __a)
3608 {
3609   return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3610 }
3611 
3612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_p8(poly8x8_t __a)3613 vreinterpret_u16_p8 (poly8x8_t __a)
3614 {
3615   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3616 }
3617 
3618 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_p16(poly16x4_t __a)3619 vreinterpret_u16_p16 (poly16x4_t __a)
3620 {
3621   return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3622 }
3623 
3624 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s8(int8x16_t __a)3625 vreinterpretq_u16_s8 (int8x16_t __a)
3626 {
3627   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3628 }
3629 
3630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s16(int16x8_t __a)3631 vreinterpretq_u16_s16 (int16x8_t __a)
3632 {
3633   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3634 }
3635 
3636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s32(int32x4_t __a)3637 vreinterpretq_u16_s32 (int32x4_t __a)
3638 {
3639   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3640 }
3641 
3642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s64(int64x2_t __a)3643 vreinterpretq_u16_s64 (int64x2_t __a)
3644 {
3645   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3646 }
3647 
3648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f32(float32x4_t __a)3649 vreinterpretq_u16_f32 (float32x4_t __a)
3650 {
3651   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3652 }
3653 
3654 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u8(uint8x16_t __a)3655 vreinterpretq_u16_u8 (uint8x16_t __a)
3656 {
3657   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3658 							      __a);
3659 }
3660 
3661 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u32(uint32x4_t __a)3662 vreinterpretq_u16_u32 (uint32x4_t __a)
3663 {
3664   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3665 }
3666 
3667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_u64(uint64x2_t __a)3668 vreinterpretq_u16_u64 (uint64x2_t __a)
3669 {
3670   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3671 }
3672 
3673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p8(poly8x16_t __a)3674 vreinterpretq_u16_p8 (poly8x16_t __a)
3675 {
3676   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3677 							      __a);
3678 }
3679 
3680 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p16(poly16x8_t __a)3681 vreinterpretq_u16_p16 (poly16x8_t __a)
3682 {
3683   return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3684 }
3685 
3686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s8(int8x8_t __a)3687 vreinterpret_u32_s8 (int8x8_t __a)
3688 {
3689   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3690 }
3691 
3692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s16(int16x4_t __a)3693 vreinterpret_u32_s16 (int16x4_t __a)
3694 {
3695   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3696 }
3697 
3698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s32(int32x2_t __a)3699 vreinterpret_u32_s32 (int32x2_t __a)
3700 {
3701   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3702 }
3703 
3704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s64(int64x1_t __a)3705 vreinterpret_u32_s64 (int64x1_t __a)
3706 {
3707   return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3708 }
3709 
3710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f32(float32x2_t __a)3711 vreinterpret_u32_f32 (float32x2_t __a)
3712 {
3713   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3714 }
3715 
3716 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u8(uint8x8_t __a)3717 vreinterpret_u32_u8 (uint8x8_t __a)
3718 {
3719   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3720 }
3721 
3722 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u16(uint16x4_t __a)3723 vreinterpret_u32_u16 (uint16x4_t __a)
3724 {
3725   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3726 }
3727 
3728 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_u64(uint64x1_t __a)3729 vreinterpret_u32_u64 (uint64x1_t __a)
3730 {
3731   return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3732 }
3733 
3734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_p8(poly8x8_t __a)3735 vreinterpret_u32_p8 (poly8x8_t __a)
3736 {
3737   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3738 }
3739 
3740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_p16(poly16x4_t __a)3741 vreinterpret_u32_p16 (poly16x4_t __a)
3742 {
3743   return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3744 }
3745 
3746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s8(int8x16_t __a)3747 vreinterpretq_u32_s8 (int8x16_t __a)
3748 {
3749   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3750 }
3751 
3752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s16(int16x8_t __a)3753 vreinterpretq_u32_s16 (int16x8_t __a)
3754 {
3755   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3756 }
3757 
3758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s32(int32x4_t __a)3759 vreinterpretq_u32_s32 (int32x4_t __a)
3760 {
3761   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3762 }
3763 
3764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s64(int64x2_t __a)3765 vreinterpretq_u32_s64 (int64x2_t __a)
3766 {
3767   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3768 }
3769 
3770 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f32(float32x4_t __a)3771 vreinterpretq_u32_f32 (float32x4_t __a)
3772 {
3773   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3774 }
3775 
3776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u8(uint8x16_t __a)3777 vreinterpretq_u32_u8 (uint8x16_t __a)
3778 {
3779   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3780 							      __a);
3781 }
3782 
3783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u16(uint16x8_t __a)3784 vreinterpretq_u32_u16 (uint16x8_t __a)
3785 {
3786   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3787 }
3788 
3789 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_u64(uint64x2_t __a)3790 vreinterpretq_u32_u64 (uint64x2_t __a)
3791 {
3792   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3793 }
3794 
3795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p8(poly8x16_t __a)3796 vreinterpretq_u32_p8 (poly8x16_t __a)
3797 {
3798   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3799 							      __a);
3800 }
3801 
3802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p16(poly16x8_t __a)3803 vreinterpretq_u32_p16 (poly16x8_t __a)
3804 {
3805   return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3806 }
3807 
3808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcombine_s8(int8x8_t __a,int8x8_t __b)3809 vcombine_s8 (int8x8_t __a, int8x8_t __b)
3810 {
3811   return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
3812 }
3813 
3814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcombine_s16(int16x4_t __a,int16x4_t __b)3815 vcombine_s16 (int16x4_t __a, int16x4_t __b)
3816 {
3817   return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
3818 }
3819 
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcombine_s32(int32x2_t __a,int32x2_t __b)3821 vcombine_s32 (int32x2_t __a, int32x2_t __b)
3822 {
3823   return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
3824 }
3825 
3826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcombine_s64(int64x1_t __a,int64x1_t __b)3827 vcombine_s64 (int64x1_t __a, int64x1_t __b)
3828 {
3829   return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
3830 }
3831 
3832 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32(float32x2_t __a,float32x2_t __b)3833 vcombine_f32 (float32x2_t __a, float32x2_t __b)
3834 {
3835   return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
3836 }
3837 
3838 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcombine_u8(uint8x8_t __a,uint8x8_t __b)3839 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
3840 {
3841   return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3842 						     (int8x8_t) __b);
3843 }
3844 
3845 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcombine_u16(uint16x4_t __a,uint16x4_t __b)3846 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
3847 {
3848   return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3849 						     (int16x4_t) __b);
3850 }
3851 
3852 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcombine_u32(uint32x2_t __a,uint32x2_t __b)3853 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
3854 {
3855   return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
3856 						     (int32x2_t) __b);
3857 }
3858 
3859 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcombine_u64(uint64x1_t __a,uint64x1_t __b)3860 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
3861 {
3862   return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
3863 						   (int64x1_t) __b);
3864 }
3865 
3866 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcombine_f64(float64x1_t __a,float64x1_t __b)3867 vcombine_f64 (float64x1_t __a, float64x1_t __b)
3868 {
3869   return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
3870 }
3871 
3872 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcombine_p8(poly8x8_t __a,poly8x8_t __b)3873 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
3874 {
3875   return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3876 						     (int8x8_t) __b);
3877 }
3878 
3879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vcombine_p16(poly16x4_t __a,poly16x4_t __b)3880 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
3881 {
3882   return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3883 						     (int16x4_t) __b);
3884 }
3885 
3886 /* Start of temporary inline asm implementations.  */
3887 
3888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)3889 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
3890 {
3891   int8x8_t result;
3892   __asm__ ("saba %0.8b,%2.8b,%3.8b"
3893            : "=w"(result)
3894            : "0"(a), "w"(b), "w"(c)
3895            : /* No clobbers */);
3896   return result;
3897 }
3898 
3899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)3900 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
3901 {
3902   int16x4_t result;
3903   __asm__ ("saba %0.4h,%2.4h,%3.4h"
3904            : "=w"(result)
3905            : "0"(a), "w"(b), "w"(c)
3906            : /* No clobbers */);
3907   return result;
3908 }
3909 
3910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)3911 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
3912 {
3913   int32x2_t result;
3914   __asm__ ("saba %0.2s,%2.2s,%3.2s"
3915            : "=w"(result)
3916            : "0"(a), "w"(b), "w"(c)
3917            : /* No clobbers */);
3918   return result;
3919 }
3920 
3921 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)3922 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
3923 {
3924   uint8x8_t result;
3925   __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3926            : "=w"(result)
3927            : "0"(a), "w"(b), "w"(c)
3928            : /* No clobbers */);
3929   return result;
3930 }
3931 
3932 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)3933 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
3934 {
3935   uint16x4_t result;
3936   __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3937            : "=w"(result)
3938            : "0"(a), "w"(b), "w"(c)
3939            : /* No clobbers */);
3940   return result;
3941 }
3942 
3943 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)3944 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
3945 {
3946   uint32x2_t result;
3947   __asm__ ("uaba %0.2s,%2.2s,%3.2s"
3948            : "=w"(result)
3949            : "0"(a), "w"(b), "w"(c)
3950            : /* No clobbers */);
3951   return result;
3952 }
3953 
3954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)3955 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
3956 {
3957   int16x8_t result;
3958   __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
3959            : "=w"(result)
3960            : "0"(a), "w"(b), "w"(c)
3961            : /* No clobbers */);
3962   return result;
3963 }
3964 
3965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)3966 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
3967 {
3968   int32x4_t result;
3969   __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
3970            : "=w"(result)
3971            : "0"(a), "w"(b), "w"(c)
3972            : /* No clobbers */);
3973   return result;
3974 }
3975 
3976 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)3977 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
3978 {
3979   int64x2_t result;
3980   __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
3981            : "=w"(result)
3982            : "0"(a), "w"(b), "w"(c)
3983            : /* No clobbers */);
3984   return result;
3985 }
3986 
3987 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)3988 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
3989 {
3990   uint16x8_t result;
3991   __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
3992            : "=w"(result)
3993            : "0"(a), "w"(b), "w"(c)
3994            : /* No clobbers */);
3995   return result;
3996 }
3997 
3998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)3999 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4000 {
4001   uint32x4_t result;
4002   __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4003            : "=w"(result)
4004            : "0"(a), "w"(b), "w"(c)
4005            : /* No clobbers */);
4006   return result;
4007 }
4008 
4009 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)4010 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4011 {
4012   uint64x2_t result;
4013   __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4014            : "=w"(result)
4015            : "0"(a), "w"(b), "w"(c)
4016            : /* No clobbers */);
4017   return result;
4018 }
4019 
4020 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)4021 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4022 {
4023   int16x8_t result;
4024   __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4025            : "=w"(result)
4026            : "0"(a), "w"(b), "w"(c)
4027            : /* No clobbers */);
4028   return result;
4029 }
4030 
4031 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)4032 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4033 {
4034   int32x4_t result;
4035   __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4036            : "=w"(result)
4037            : "0"(a), "w"(b), "w"(c)
4038            : /* No clobbers */);
4039   return result;
4040 }
4041 
4042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)4043 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4044 {
4045   int64x2_t result;
4046   __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4047            : "=w"(result)
4048            : "0"(a), "w"(b), "w"(c)
4049            : /* No clobbers */);
4050   return result;
4051 }
4052 
4053 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)4054 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4055 {
4056   uint16x8_t result;
4057   __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4058            : "=w"(result)
4059            : "0"(a), "w"(b), "w"(c)
4060            : /* No clobbers */);
4061   return result;
4062 }
4063 
4064 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)4065 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4066 {
4067   uint32x4_t result;
4068   __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4069            : "=w"(result)
4070            : "0"(a), "w"(b), "w"(c)
4071            : /* No clobbers */);
4072   return result;
4073 }
4074 
4075 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)4076 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4077 {
4078   uint64x2_t result;
4079   __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4080            : "=w"(result)
4081            : "0"(a), "w"(b), "w"(c)
4082            : /* No clobbers */);
4083   return result;
4084 }
4085 
4086 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)4087 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4088 {
4089   int8x16_t result;
4090   __asm__ ("saba %0.16b,%2.16b,%3.16b"
4091            : "=w"(result)
4092            : "0"(a), "w"(b), "w"(c)
4093            : /* No clobbers */);
4094   return result;
4095 }
4096 
4097 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)4098 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4099 {
4100   int16x8_t result;
4101   __asm__ ("saba %0.8h,%2.8h,%3.8h"
4102            : "=w"(result)
4103            : "0"(a), "w"(b), "w"(c)
4104            : /* No clobbers */);
4105   return result;
4106 }
4107 
4108 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)4109 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4110 {
4111   int32x4_t result;
4112   __asm__ ("saba %0.4s,%2.4s,%3.4s"
4113            : "=w"(result)
4114            : "0"(a), "w"(b), "w"(c)
4115            : /* No clobbers */);
4116   return result;
4117 }
4118 
4119 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)4120 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4121 {
4122   uint8x16_t result;
4123   __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4124            : "=w"(result)
4125            : "0"(a), "w"(b), "w"(c)
4126            : /* No clobbers */);
4127   return result;
4128 }
4129 
4130 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)4131 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4132 {
4133   uint16x8_t result;
4134   __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4135            : "=w"(result)
4136            : "0"(a), "w"(b), "w"(c)
4137            : /* No clobbers */);
4138   return result;
4139 }
4140 
4141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)4142 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4143 {
4144   uint32x4_t result;
4145   __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4146            : "=w"(result)
4147            : "0"(a), "w"(b), "w"(c)
4148            : /* No clobbers */);
4149   return result;
4150 }
4151 
4152 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vabd_f32(float32x2_t a,float32x2_t b)4153 vabd_f32 (float32x2_t a, float32x2_t b)
4154 {
4155   float32x2_t result;
4156   __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4157            : "=w"(result)
4158            : "w"(a), "w"(b)
4159            : /* No clobbers */);
4160   return result;
4161 }
4162 
4163 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabd_s8(int8x8_t a,int8x8_t b)4164 vabd_s8 (int8x8_t a, int8x8_t b)
4165 {
4166   int8x8_t result;
4167   __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4168            : "=w"(result)
4169            : "w"(a), "w"(b)
4170            : /* No clobbers */);
4171   return result;
4172 }
4173 
4174 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabd_s16(int16x4_t a,int16x4_t b)4175 vabd_s16 (int16x4_t a, int16x4_t b)
4176 {
4177   int16x4_t result;
4178   __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4179            : "=w"(result)
4180            : "w"(a), "w"(b)
4181            : /* No clobbers */);
4182   return result;
4183 }
4184 
4185 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabd_s32(int32x2_t a,int32x2_t b)4186 vabd_s32 (int32x2_t a, int32x2_t b)
4187 {
4188   int32x2_t result;
4189   __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4190            : "=w"(result)
4191            : "w"(a), "w"(b)
4192            : /* No clobbers */);
4193   return result;
4194 }
4195 
4196 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vabd_u8(uint8x8_t a,uint8x8_t b)4197 vabd_u8 (uint8x8_t a, uint8x8_t b)
4198 {
4199   uint8x8_t result;
4200   __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4201            : "=w"(result)
4202            : "w"(a), "w"(b)
4203            : /* No clobbers */);
4204   return result;
4205 }
4206 
4207 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vabd_u16(uint16x4_t a,uint16x4_t b)4208 vabd_u16 (uint16x4_t a, uint16x4_t b)
4209 {
4210   uint16x4_t result;
4211   __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4212            : "=w"(result)
4213            : "w"(a), "w"(b)
4214            : /* No clobbers */);
4215   return result;
4216 }
4217 
4218 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vabd_u32(uint32x2_t a,uint32x2_t b)4219 vabd_u32 (uint32x2_t a, uint32x2_t b)
4220 {
4221   uint32x2_t result;
4222   __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4223            : "=w"(result)
4224            : "w"(a), "w"(b)
4225            : /* No clobbers */);
4226   return result;
4227 }
4228 
4229 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vabdd_f64(float64_t a,float64_t b)4230 vabdd_f64 (float64_t a, float64_t b)
4231 {
4232   float64_t result;
4233   __asm__ ("fabd %d0, %d1, %d2"
4234            : "=w"(result)
4235            : "w"(a), "w"(b)
4236            : /* No clobbers */);
4237   return result;
4238 }
4239 
4240 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdl_high_s8(int8x16_t a,int8x16_t b)4241 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4242 {
4243   int16x8_t result;
4244   __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4245            : "=w"(result)
4246            : "w"(a), "w"(b)
4247            : /* No clobbers */);
4248   return result;
4249 }
4250 
4251 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdl_high_s16(int16x8_t a,int16x8_t b)4252 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4253 {
4254   int32x4_t result;
4255   __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4256            : "=w"(result)
4257            : "w"(a), "w"(b)
4258            : /* No clobbers */);
4259   return result;
4260 }
4261 
4262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabdl_high_s32(int32x4_t a,int32x4_t b)4263 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4264 {
4265   int64x2_t result;
4266   __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4267            : "=w"(result)
4268            : "w"(a), "w"(b)
4269            : /* No clobbers */);
4270   return result;
4271 }
4272 
4273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdl_high_u8(uint8x16_t a,uint8x16_t b)4274 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4275 {
4276   uint16x8_t result;
4277   __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4278            : "=w"(result)
4279            : "w"(a), "w"(b)
4280            : /* No clobbers */);
4281   return result;
4282 }
4283 
4284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdl_high_u16(uint16x8_t a,uint16x8_t b)4285 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4286 {
4287   uint32x4_t result;
4288   __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4289            : "=w"(result)
4290            : "w"(a), "w"(b)
4291            : /* No clobbers */);
4292   return result;
4293 }
4294 
4295 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabdl_high_u32(uint32x4_t a,uint32x4_t b)4296 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4297 {
4298   uint64x2_t result;
4299   __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4300            : "=w"(result)
4301            : "w"(a), "w"(b)
4302            : /* No clobbers */);
4303   return result;
4304 }
4305 
4306 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdl_s8(int8x8_t a,int8x8_t b)4307 vabdl_s8 (int8x8_t a, int8x8_t b)
4308 {
4309   int16x8_t result;
4310   __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4311            : "=w"(result)
4312            : "w"(a), "w"(b)
4313            : /* No clobbers */);
4314   return result;
4315 }
4316 
4317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdl_s16(int16x4_t a,int16x4_t b)4318 vabdl_s16 (int16x4_t a, int16x4_t b)
4319 {
4320   int32x4_t result;
4321   __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4322            : "=w"(result)
4323            : "w"(a), "w"(b)
4324            : /* No clobbers */);
4325   return result;
4326 }
4327 
4328 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabdl_s32(int32x2_t a,int32x2_t b)4329 vabdl_s32 (int32x2_t a, int32x2_t b)
4330 {
4331   int64x2_t result;
4332   __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4333            : "=w"(result)
4334            : "w"(a), "w"(b)
4335            : /* No clobbers */);
4336   return result;
4337 }
4338 
4339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdl_u8(uint8x8_t a,uint8x8_t b)4340 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4341 {
4342   uint16x8_t result;
4343   __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4344            : "=w"(result)
4345            : "w"(a), "w"(b)
4346            : /* No clobbers */);
4347   return result;
4348 }
4349 
4350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdl_u16(uint16x4_t a,uint16x4_t b)4351 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4352 {
4353   uint32x4_t result;
4354   __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4355            : "=w"(result)
4356            : "w"(a), "w"(b)
4357            : /* No clobbers */);
4358   return result;
4359 }
4360 
4361 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vabdl_u32(uint32x2_t a,uint32x2_t b)4362 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4363 {
4364   uint64x2_t result;
4365   __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4366            : "=w"(result)
4367            : "w"(a), "w"(b)
4368            : /* No clobbers */);
4369   return result;
4370 }
4371 
4372 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vabdq_f32(float32x4_t a,float32x4_t b)4373 vabdq_f32 (float32x4_t a, float32x4_t b)
4374 {
4375   float32x4_t result;
4376   __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4377            : "=w"(result)
4378            : "w"(a), "w"(b)
4379            : /* No clobbers */);
4380   return result;
4381 }
4382 
4383 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vabdq_f64(float64x2_t a,float64x2_t b)4384 vabdq_f64 (float64x2_t a, float64x2_t b)
4385 {
4386   float64x2_t result;
4387   __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4388            : "=w"(result)
4389            : "w"(a), "w"(b)
4390            : /* No clobbers */);
4391   return result;
4392 }
4393 
4394 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabdq_s8(int8x16_t a,int8x16_t b)4395 vabdq_s8 (int8x16_t a, int8x16_t b)
4396 {
4397   int8x16_t result;
4398   __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4399            : "=w"(result)
4400            : "w"(a), "w"(b)
4401            : /* No clobbers */);
4402   return result;
4403 }
4404 
4405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabdq_s16(int16x8_t a,int16x8_t b)4406 vabdq_s16 (int16x8_t a, int16x8_t b)
4407 {
4408   int16x8_t result;
4409   __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4410            : "=w"(result)
4411            : "w"(a), "w"(b)
4412            : /* No clobbers */);
4413   return result;
4414 }
4415 
4416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabdq_s32(int32x4_t a,int32x4_t b)4417 vabdq_s32 (int32x4_t a, int32x4_t b)
4418 {
4419   int32x4_t result;
4420   __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4421            : "=w"(result)
4422            : "w"(a), "w"(b)
4423            : /* No clobbers */);
4424   return result;
4425 }
4426 
4427 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vabdq_u8(uint8x16_t a,uint8x16_t b)4428 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4429 {
4430   uint8x16_t result;
4431   __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4432            : "=w"(result)
4433            : "w"(a), "w"(b)
4434            : /* No clobbers */);
4435   return result;
4436 }
4437 
4438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vabdq_u16(uint16x8_t a,uint16x8_t b)4439 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4440 {
4441   uint16x8_t result;
4442   __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4443            : "=w"(result)
4444            : "w"(a), "w"(b)
4445            : /* No clobbers */);
4446   return result;
4447 }
4448 
4449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vabdq_u32(uint32x4_t a,uint32x4_t b)4450 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4451 {
4452   uint32x4_t result;
4453   __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4454            : "=w"(result)
4455            : "w"(a), "w"(b)
4456            : /* No clobbers */);
4457   return result;
4458 }
4459 
4460 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vabds_f32(float32_t a,float32_t b)4461 vabds_f32 (float32_t a, float32_t b)
4462 {
4463   float32_t result;
4464   __asm__ ("fabd %s0, %s1, %s2"
4465            : "=w"(result)
4466            : "w"(a), "w"(b)
4467            : /* No clobbers */);
4468   return result;
4469 }
4470 
4471 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vabs_f32(float32x2_t a)4472 vabs_f32 (float32x2_t a)
4473 {
4474   float32x2_t result;
4475   __asm__ ("fabs %0.2s,%1.2s"
4476            : "=w"(result)
4477            : "w"(a)
4478            : /* No clobbers */);
4479   return result;
4480 }
4481 
4482 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vabs_s8(int8x8_t a)4483 vabs_s8 (int8x8_t a)
4484 {
4485   int8x8_t result;
4486   __asm__ ("abs %0.8b,%1.8b"
4487            : "=w"(result)
4488            : "w"(a)
4489            : /* No clobbers */);
4490   return result;
4491 }
4492 
4493 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vabs_s16(int16x4_t a)4494 vabs_s16 (int16x4_t a)
4495 {
4496   int16x4_t result;
4497   __asm__ ("abs %0.4h,%1.4h"
4498            : "=w"(result)
4499            : "w"(a)
4500            : /* No clobbers */);
4501   return result;
4502 }
4503 
4504 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vabs_s32(int32x2_t a)4505 vabs_s32 (int32x2_t a)
4506 {
4507   int32x2_t result;
4508   __asm__ ("abs %0.2s,%1.2s"
4509            : "=w"(result)
4510            : "w"(a)
4511            : /* No clobbers */);
4512   return result;
4513 }
4514 
4515 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vabsq_f32(float32x4_t a)4516 vabsq_f32 (float32x4_t a)
4517 {
4518   float32x4_t result;
4519   __asm__ ("fabs %0.4s,%1.4s"
4520            : "=w"(result)
4521            : "w"(a)
4522            : /* No clobbers */);
4523   return result;
4524 }
4525 
4526 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vabsq_f64(float64x2_t a)4527 vabsq_f64 (float64x2_t a)
4528 {
4529   float64x2_t result;
4530   __asm__ ("fabs %0.2d,%1.2d"
4531            : "=w"(result)
4532            : "w"(a)
4533            : /* No clobbers */);
4534   return result;
4535 }
4536 
4537 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vabsq_s8(int8x16_t a)4538 vabsq_s8 (int8x16_t a)
4539 {
4540   int8x16_t result;
4541   __asm__ ("abs %0.16b,%1.16b"
4542            : "=w"(result)
4543            : "w"(a)
4544            : /* No clobbers */);
4545   return result;
4546 }
4547 
4548 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vabsq_s16(int16x8_t a)4549 vabsq_s16 (int16x8_t a)
4550 {
4551   int16x8_t result;
4552   __asm__ ("abs %0.8h,%1.8h"
4553            : "=w"(result)
4554            : "w"(a)
4555            : /* No clobbers */);
4556   return result;
4557 }
4558 
4559 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vabsq_s32(int32x4_t a)4560 vabsq_s32 (int32x4_t a)
4561 {
4562   int32x4_t result;
4563   __asm__ ("abs %0.4s,%1.4s"
4564            : "=w"(result)
4565            : "w"(a)
4566            : /* No clobbers */);
4567   return result;
4568 }
4569 
4570 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vabsq_s64(int64x2_t a)4571 vabsq_s64 (int64x2_t a)
4572 {
4573   int64x2_t result;
4574   __asm__ ("abs %0.2d,%1.2d"
4575            : "=w"(result)
4576            : "w"(a)
4577            : /* No clobbers */);
4578   return result;
4579 }
4580 
4581 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vacged_f64(float64_t a,float64_t b)4582 vacged_f64 (float64_t a, float64_t b)
4583 {
4584   float64_t result;
4585   __asm__ ("facge %d0,%d1,%d2"
4586            : "=w"(result)
4587            : "w"(a), "w"(b)
4588            : /* No clobbers */);
4589   return result;
4590 }
4591 
4592 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vacges_f32(float32_t a,float32_t b)4593 vacges_f32 (float32_t a, float32_t b)
4594 {
4595   float32_t result;
4596   __asm__ ("facge %s0,%s1,%s2"
4597            : "=w"(result)
4598            : "w"(a), "w"(b)
4599            : /* No clobbers */);
4600   return result;
4601 }
4602 
4603 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vacgtd_f64(float64_t a,float64_t b)4604 vacgtd_f64 (float64_t a, float64_t b)
4605 {
4606   float64_t result;
4607   __asm__ ("facgt %d0,%d1,%d2"
4608            : "=w"(result)
4609            : "w"(a), "w"(b)
4610            : /* No clobbers */);
4611   return result;
4612 }
4613 
4614 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vacgts_f32(float32_t a,float32_t b)4615 vacgts_f32 (float32_t a, float32_t b)
4616 {
4617   float32_t result;
4618   __asm__ ("facgt %s0,%s1,%s2"
4619            : "=w"(result)
4620            : "w"(a), "w"(b)
4621            : /* No clobbers */);
4622   return result;
4623 }
4624 
4625 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlv_s8(int8x8_t a)4626 vaddlv_s8 (int8x8_t a)
4627 {
4628   int16_t result;
4629   __asm__ ("saddlv %h0,%1.8b"
4630            : "=w"(result)
4631            : "w"(a)
4632            : /* No clobbers */);
4633   return result;
4634 }
4635 
4636 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddlv_s16(int16x4_t a)4637 vaddlv_s16 (int16x4_t a)
4638 {
4639   int32_t result;
4640   __asm__ ("saddlv %s0,%1.4h"
4641            : "=w"(result)
4642            : "w"(a)
4643            : /* No clobbers */);
4644   return result;
4645 }
4646 
4647 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddlv_u8(uint8x8_t a)4648 vaddlv_u8 (uint8x8_t a)
4649 {
4650   uint16_t result;
4651   __asm__ ("uaddlv %h0,%1.8b"
4652            : "=w"(result)
4653            : "w"(a)
4654            : /* No clobbers */);
4655   return result;
4656 }
4657 
4658 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddlv_u16(uint16x4_t a)4659 vaddlv_u16 (uint16x4_t a)
4660 {
4661   uint32_t result;
4662   __asm__ ("uaddlv %s0,%1.4h"
4663            : "=w"(result)
4664            : "w"(a)
4665            : /* No clobbers */);
4666   return result;
4667 }
4668 
4669 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlvq_s8(int8x16_t a)4670 vaddlvq_s8 (int8x16_t a)
4671 {
4672   int16_t result;
4673   __asm__ ("saddlv %h0,%1.16b"
4674            : "=w"(result)
4675            : "w"(a)
4676            : /* No clobbers */);
4677   return result;
4678 }
4679 
4680 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddlvq_s16(int16x8_t a)4681 vaddlvq_s16 (int16x8_t a)
4682 {
4683   int32_t result;
4684   __asm__ ("saddlv %s0,%1.8h"
4685            : "=w"(result)
4686            : "w"(a)
4687            : /* No clobbers */);
4688   return result;
4689 }
4690 
4691 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddlvq_s32(int32x4_t a)4692 vaddlvq_s32 (int32x4_t a)
4693 {
4694   int64_t result;
4695   __asm__ ("saddlv %d0,%1.4s"
4696            : "=w"(result)
4697            : "w"(a)
4698            : /* No clobbers */);
4699   return result;
4700 }
4701 
4702 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddlvq_u8(uint8x16_t a)4703 vaddlvq_u8 (uint8x16_t a)
4704 {
4705   uint16_t result;
4706   __asm__ ("uaddlv %h0,%1.16b"
4707            : "=w"(result)
4708            : "w"(a)
4709            : /* No clobbers */);
4710   return result;
4711 }
4712 
4713 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddlvq_u16(uint16x8_t a)4714 vaddlvq_u16 (uint16x8_t a)
4715 {
4716   uint32_t result;
4717   __asm__ ("uaddlv %s0,%1.8h"
4718            : "=w"(result)
4719            : "w"(a)
4720            : /* No clobbers */);
4721   return result;
4722 }
4723 
4724 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddlvq_u32(uint32x4_t a)4725 vaddlvq_u32 (uint32x4_t a)
4726 {
4727   uint64_t result;
4728   __asm__ ("uaddlv %d0,%1.4s"
4729            : "=w"(result)
4730            : "w"(a)
4731            : /* No clobbers */);
4732   return result;
4733 }
4734 
4735 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8(int8x8_t a)4736 vaddv_s8 (int8x8_t a)
4737 {
4738   int8_t result;
4739   __asm__ ("addv %b0,%1.8b"
4740            : "=w"(result)
4741            : "w"(a)
4742            : /* No clobbers */);
4743   return result;
4744 }
4745 
4746 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16(int16x4_t a)4747 vaddv_s16 (int16x4_t a)
4748 {
4749   int16_t result;
4750   __asm__ ("addv %h0,%1.4h"
4751            : "=w"(result)
4752            : "w"(a)
4753            : /* No clobbers */);
4754   return result;
4755 }
4756 
4757 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddv_u8(uint8x8_t a)4758 vaddv_u8 (uint8x8_t a)
4759 {
4760   uint8_t result;
4761   __asm__ ("addv %b0,%1.8b"
4762            : "=w"(result)
4763            : "w"(a)
4764            : /* No clobbers */);
4765   return result;
4766 }
4767 
4768 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddv_u16(uint16x4_t a)4769 vaddv_u16 (uint16x4_t a)
4770 {
4771   uint16_t result;
4772   __asm__ ("addv %h0,%1.4h"
4773            : "=w"(result)
4774            : "w"(a)
4775            : /* No clobbers */);
4776   return result;
4777 }
4778 
4779 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8(int8x16_t a)4780 vaddvq_s8 (int8x16_t a)
4781 {
4782   int8_t result;
4783   __asm__ ("addv %b0,%1.16b"
4784            : "=w"(result)
4785            : "w"(a)
4786            : /* No clobbers */);
4787   return result;
4788 }
4789 
4790 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16(int16x8_t a)4791 vaddvq_s16 (int16x8_t a)
4792 {
4793   int16_t result;
4794   __asm__ ("addv %h0,%1.8h"
4795            : "=w"(result)
4796            : "w"(a)
4797            : /* No clobbers */);
4798   return result;
4799 }
4800 
4801 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32(int32x4_t a)4802 vaddvq_s32 (int32x4_t a)
4803 {
4804   int32_t result;
4805   __asm__ ("addv %s0,%1.4s"
4806            : "=w"(result)
4807            : "w"(a)
4808            : /* No clobbers */);
4809   return result;
4810 }
4811 
4812 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddvq_u8(uint8x16_t a)4813 vaddvq_u8 (uint8x16_t a)
4814 {
4815   uint8_t result;
4816   __asm__ ("addv %b0,%1.16b"
4817            : "=w"(result)
4818            : "w"(a)
4819            : /* No clobbers */);
4820   return result;
4821 }
4822 
4823 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddvq_u16(uint16x8_t a)4824 vaddvq_u16 (uint16x8_t a)
4825 {
4826   uint16_t result;
4827   __asm__ ("addv %h0,%1.8h"
4828            : "=w"(result)
4829            : "w"(a)
4830            : /* No clobbers */);
4831   return result;
4832 }
4833 
4834 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddvq_u32(uint32x4_t a)4835 vaddvq_u32 (uint32x4_t a)
4836 {
4837   uint32_t result;
4838   __asm__ ("addv %s0,%1.4s"
4839            : "=w"(result)
4840            : "w"(a)
4841            : /* No clobbers */);
4842   return result;
4843 }
4844 
4845 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)4846 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4847 {
4848   float32x2_t result;
4849   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4850            : "=w"(result)
4851            : "0"(a), "w"(b), "w"(c)
4852            : /* No clobbers */);
4853   return result;
4854 }
4855 
4856 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)4857 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4858 {
4859   poly8x8_t result;
4860   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4861            : "=w"(result)
4862            : "0"(a), "w"(b), "w"(c)
4863            : /* No clobbers */);
4864   return result;
4865 }
4866 
4867 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)4868 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4869 {
4870   poly16x4_t result;
4871   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4872            : "=w"(result)
4873            : "0"(a), "w"(b), "w"(c)
4874            : /* No clobbers */);
4875   return result;
4876 }
4877 
4878 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)4879 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4880 {
4881   int8x8_t result;
4882   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4883            : "=w"(result)
4884            : "0"(a), "w"(b), "w"(c)
4885            : /* No clobbers */);
4886   return result;
4887 }
4888 
4889 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)4890 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4891 {
4892   int16x4_t result;
4893   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4894            : "=w"(result)
4895            : "0"(a), "w"(b), "w"(c)
4896            : /* No clobbers */);
4897   return result;
4898 }
4899 
4900 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)4901 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4902 {
4903   int32x2_t result;
4904   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4905            : "=w"(result)
4906            : "0"(a), "w"(b), "w"(c)
4907            : /* No clobbers */);
4908   return result;
4909 }
4910 
4911 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)4912 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4913 {
4914   int64x1_t result;
4915   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4916            : "=w"(result)
4917            : "0"(a), "w"(b), "w"(c)
4918            : /* No clobbers */);
4919   return result;
4920 }
4921 
4922 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)4923 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4924 {
4925   uint8x8_t result;
4926   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4927            : "=w"(result)
4928            : "0"(a), "w"(b), "w"(c)
4929            : /* No clobbers */);
4930   return result;
4931 }
4932 
4933 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)4934 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4935 {
4936   uint16x4_t result;
4937   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4938            : "=w"(result)
4939            : "0"(a), "w"(b), "w"(c)
4940            : /* No clobbers */);
4941   return result;
4942 }
4943 
4944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)4945 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4946 {
4947   uint32x2_t result;
4948   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4949            : "=w"(result)
4950            : "0"(a), "w"(b), "w"(c)
4951            : /* No clobbers */);
4952   return result;
4953 }
4954 
4955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)4956 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4957 {
4958   uint64x1_t result;
4959   __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4960            : "=w"(result)
4961            : "0"(a), "w"(b), "w"(c)
4962            : /* No clobbers */);
4963   return result;
4964 }
4965 
4966 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)4967 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4968 {
4969   float32x4_t result;
4970   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4971            : "=w"(result)
4972            : "0"(a), "w"(b), "w"(c)
4973            : /* No clobbers */);
4974   return result;
4975 }
4976 
4977 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vbslq_f64(uint64x2_t a,float64x2_t b,float64x2_t c)4978 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4979 {
4980   float64x2_t result;
4981   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4982            : "=w"(result)
4983            : "0"(a), "w"(b), "w"(c)
4984            : /* No clobbers */);
4985   return result;
4986 }
4987 
4988 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)4989 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4990 {
4991   poly8x16_t result;
4992   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4993            : "=w"(result)
4994            : "0"(a), "w"(b), "w"(c)
4995            : /* No clobbers */);
4996   return result;
4997 }
4998 
4999 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)5000 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
5001 {
5002   poly16x8_t result;
5003   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5004            : "=w"(result)
5005            : "0"(a), "w"(b), "w"(c)
5006            : /* No clobbers */);
5007   return result;
5008 }
5009 
5010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)5011 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
5012 {
5013   int8x16_t result;
5014   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5015            : "=w"(result)
5016            : "0"(a), "w"(b), "w"(c)
5017            : /* No clobbers */);
5018   return result;
5019 }
5020 
5021 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)5022 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
5023 {
5024   int16x8_t result;
5025   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5026            : "=w"(result)
5027            : "0"(a), "w"(b), "w"(c)
5028            : /* No clobbers */);
5029   return result;
5030 }
5031 
5032 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)5033 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
5034 {
5035   int32x4_t result;
5036   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5037            : "=w"(result)
5038            : "0"(a), "w"(b), "w"(c)
5039            : /* No clobbers */);
5040   return result;
5041 }
5042 
5043 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)5044 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
5045 {
5046   int64x2_t result;
5047   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5048            : "=w"(result)
5049            : "0"(a), "w"(b), "w"(c)
5050            : /* No clobbers */);
5051   return result;
5052 }
5053 
5054 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)5055 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5056 {
5057   uint8x16_t result;
5058   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5059            : "=w"(result)
5060            : "0"(a), "w"(b), "w"(c)
5061            : /* No clobbers */);
5062   return result;
5063 }
5064 
5065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)5066 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5067 {
5068   uint16x8_t result;
5069   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5070            : "=w"(result)
5071            : "0"(a), "w"(b), "w"(c)
5072            : /* No clobbers */);
5073   return result;
5074 }
5075 
5076 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)5077 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5078 {
5079   uint32x4_t result;
5080   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5081            : "=w"(result)
5082            : "0"(a), "w"(b), "w"(c)
5083            : /* No clobbers */);
5084   return result;
5085 }
5086 
5087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)5088 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
5089 {
5090   uint64x2_t result;
5091   __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5092            : "=w"(result)
5093            : "0"(a), "w"(b), "w"(c)
5094            : /* No clobbers */);
5095   return result;
5096 }
5097 
5098 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcage_f32(float32x2_t a,float32x2_t b)5099 vcage_f32 (float32x2_t a, float32x2_t b)
5100 {
5101   uint32x2_t result;
5102   __asm__ ("facge %0.2s, %1.2s, %2.2s"
5103            : "=w"(result)
5104            : "w"(a), "w"(b)
5105            : /* No clobbers */);
5106   return result;
5107 }
5108 
5109 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcageq_f32(float32x4_t a,float32x4_t b)5110 vcageq_f32 (float32x4_t a, float32x4_t b)
5111 {
5112   uint32x4_t result;
5113   __asm__ ("facge %0.4s, %1.4s, %2.4s"
5114            : "=w"(result)
5115            : "w"(a), "w"(b)
5116            : /* No clobbers */);
5117   return result;
5118 }
5119 
5120 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcageq_f64(float64x2_t a,float64x2_t b)5121 vcageq_f64 (float64x2_t a, float64x2_t b)
5122 {
5123   uint64x2_t result;
5124   __asm__ ("facge %0.2d, %1.2d, %2.2d"
5125            : "=w"(result)
5126            : "w"(a), "w"(b)
5127            : /* No clobbers */);
5128   return result;
5129 }
5130 
5131 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcagt_f32(float32x2_t a,float32x2_t b)5132 vcagt_f32 (float32x2_t a, float32x2_t b)
5133 {
5134   uint32x2_t result;
5135   __asm__ ("facgt %0.2s, %1.2s, %2.2s"
5136            : "=w"(result)
5137            : "w"(a), "w"(b)
5138            : /* No clobbers */);
5139   return result;
5140 }
5141 
5142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcagtq_f32(float32x4_t a,float32x4_t b)5143 vcagtq_f32 (float32x4_t a, float32x4_t b)
5144 {
5145   uint32x4_t result;
5146   __asm__ ("facgt %0.4s, %1.4s, %2.4s"
5147            : "=w"(result)
5148            : "w"(a), "w"(b)
5149            : /* No clobbers */);
5150   return result;
5151 }
5152 
5153 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcagtq_f64(float64x2_t a,float64x2_t b)5154 vcagtq_f64 (float64x2_t a, float64x2_t b)
5155 {
5156   uint64x2_t result;
5157   __asm__ ("facgt %0.2d, %1.2d, %2.2d"
5158            : "=w"(result)
5159            : "w"(a), "w"(b)
5160            : /* No clobbers */);
5161   return result;
5162 }
5163 
5164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcale_f32(float32x2_t a,float32x2_t b)5165 vcale_f32 (float32x2_t a, float32x2_t b)
5166 {
5167   uint32x2_t result;
5168   __asm__ ("facge %0.2s, %2.2s, %1.2s"
5169            : "=w"(result)
5170            : "w"(a), "w"(b)
5171            : /* No clobbers */);
5172   return result;
5173 }
5174 
5175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcaleq_f32(float32x4_t a,float32x4_t b)5176 vcaleq_f32 (float32x4_t a, float32x4_t b)
5177 {
5178   uint32x4_t result;
5179   __asm__ ("facge %0.4s, %2.4s, %1.4s"
5180            : "=w"(result)
5181            : "w"(a), "w"(b)
5182            : /* No clobbers */);
5183   return result;
5184 }
5185 
5186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcaleq_f64(float64x2_t a,float64x2_t b)5187 vcaleq_f64 (float64x2_t a, float64x2_t b)
5188 {
5189   uint64x2_t result;
5190   __asm__ ("facge %0.2d, %2.2d, %1.2d"
5191            : "=w"(result)
5192            : "w"(a), "w"(b)
5193            : /* No clobbers */);
5194   return result;
5195 }
5196 
5197 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcalt_f32(float32x2_t a,float32x2_t b)5198 vcalt_f32 (float32x2_t a, float32x2_t b)
5199 {
5200   uint32x2_t result;
5201   __asm__ ("facgt %0.2s, %2.2s, %1.2s"
5202            : "=w"(result)
5203            : "w"(a), "w"(b)
5204            : /* No clobbers */);
5205   return result;
5206 }
5207 
5208 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcaltq_f32(float32x4_t a,float32x4_t b)5209 vcaltq_f32 (float32x4_t a, float32x4_t b)
5210 {
5211   uint32x4_t result;
5212   __asm__ ("facgt %0.4s, %2.4s, %1.4s"
5213            : "=w"(result)
5214            : "w"(a), "w"(b)
5215            : /* No clobbers */);
5216   return result;
5217 }
5218 
5219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcaltq_f64(float64x2_t a,float64x2_t b)5220 vcaltq_f64 (float64x2_t a, float64x2_t b)
5221 {
5222   uint64x2_t result;
5223   __asm__ ("facgt %0.2d, %2.2d, %1.2d"
5224            : "=w"(result)
5225            : "w"(a), "w"(b)
5226            : /* No clobbers */);
5227   return result;
5228 }
5229 
5230 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_f32(float32x2_t a,float32x2_t b)5231 vceq_f32 (float32x2_t a, float32x2_t b)
5232 {
5233   uint32x2_t result;
5234   __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
5235            : "=w"(result)
5236            : "w"(a), "w"(b)
5237            : /* No clobbers */);
5238   return result;
5239 }
5240 
5241 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_f64(float64x1_t a,float64x1_t b)5242 vceq_f64 (float64x1_t a, float64x1_t b)
5243 {
5244   uint64x1_t result;
5245   __asm__ ("fcmeq %d0, %d1, %d2"
5246            : "=w"(result)
5247            : "w"(a), "w"(b)
5248            : /* No clobbers */);
5249   return result;
5250 }
5251 
5252 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vceqd_f64(float64_t a,float64_t b)5253 vceqd_f64 (float64_t a, float64_t b)
5254 {
5255   float64_t result;
5256   __asm__ ("fcmeq %d0,%d1,%d2"
5257            : "=w"(result)
5258            : "w"(a), "w"(b)
5259            : /* No clobbers */);
5260   return result;
5261 }
5262 
5263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_f32(float32x4_t a,float32x4_t b)5264 vceqq_f32 (float32x4_t a, float32x4_t b)
5265 {
5266   uint32x4_t result;
5267   __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
5268            : "=w"(result)
5269            : "w"(a), "w"(b)
5270            : /* No clobbers */);
5271   return result;
5272 }
5273 
5274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_f64(float64x2_t a,float64x2_t b)5275 vceqq_f64 (float64x2_t a, float64x2_t b)
5276 {
5277   uint64x2_t result;
5278   __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
5279            : "=w"(result)
5280            : "w"(a), "w"(b)
5281            : /* No clobbers */);
5282   return result;
5283 }
5284 
5285 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vceqs_f32(float32_t a,float32_t b)5286 vceqs_f32 (float32_t a, float32_t b)
5287 {
5288   float32_t result;
5289   __asm__ ("fcmeq %s0,%s1,%s2"
5290            : "=w"(result)
5291            : "w"(a), "w"(b)
5292            : /* No clobbers */);
5293   return result;
5294 }
5295 
5296 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vceqzd_f64(float64_t a)5297 vceqzd_f64 (float64_t a)
5298 {
5299   float64_t result;
5300   __asm__ ("fcmeq %d0,%d1,#0"
5301            : "=w"(result)
5302            : "w"(a)
5303            : /* No clobbers */);
5304   return result;
5305 }
5306 
5307 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vceqzs_f32(float32_t a)5308 vceqzs_f32 (float32_t a)
5309 {
5310   float32_t result;
5311   __asm__ ("fcmeq %s0,%s1,#0"
5312            : "=w"(result)
5313            : "w"(a)
5314            : /* No clobbers */);
5315   return result;
5316 }
5317 
5318 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_f32(float32x2_t a,float32x2_t b)5319 vcge_f32 (float32x2_t a, float32x2_t b)
5320 {
5321   uint32x2_t result;
5322   __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
5323            : "=w"(result)
5324            : "w"(a), "w"(b)
5325            : /* No clobbers */);
5326   return result;
5327 }
5328 
5329 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_f64(float64x1_t a,float64x1_t b)5330 vcge_f64 (float64x1_t a, float64x1_t b)
5331 {
5332   uint64x1_t result;
5333   __asm__ ("fcmge %d0, %d1, %d2"
5334            : "=w"(result)
5335            : "w"(a), "w"(b)
5336            : /* No clobbers */);
5337   return result;
5338 }
5339 
5340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_f32(float32x4_t a,float32x4_t b)5341 vcgeq_f32 (float32x4_t a, float32x4_t b)
5342 {
5343   uint32x4_t result;
5344   __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
5345            : "=w"(result)
5346            : "w"(a), "w"(b)
5347            : /* No clobbers */);
5348   return result;
5349 }
5350 
5351 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_f64(float64x2_t a,float64x2_t b)5352 vcgeq_f64 (float64x2_t a, float64x2_t b)
5353 {
5354   uint64x2_t result;
5355   __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
5356            : "=w"(result)
5357            : "w"(a), "w"(b)
5358            : /* No clobbers */);
5359   return result;
5360 }
5361 
5362 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_f32(float32x2_t a,float32x2_t b)5363 vcgt_f32 (float32x2_t a, float32x2_t b)
5364 {
5365   uint32x2_t result;
5366   __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
5367            : "=w"(result)
5368            : "w"(a), "w"(b)
5369            : /* No clobbers */);
5370   return result;
5371 }
5372 
5373 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_f64(float64x1_t a,float64x1_t b)5374 vcgt_f64 (float64x1_t a, float64x1_t b)
5375 {
5376   uint64x1_t result;
5377   __asm__ ("fcmgt %d0, %d1, %d2"
5378            : "=w"(result)
5379            : "w"(a), "w"(b)
5380            : /* No clobbers */);
5381   return result;
5382 }
5383 
5384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_f32(float32x4_t a,float32x4_t b)5385 vcgtq_f32 (float32x4_t a, float32x4_t b)
5386 {
5387   uint32x4_t result;
5388   __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
5389            : "=w"(result)
5390            : "w"(a), "w"(b)
5391            : /* No clobbers */);
5392   return result;
5393 }
5394 
5395 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_f64(float64x2_t a,float64x2_t b)5396 vcgtq_f64 (float64x2_t a, float64x2_t b)
5397 {
5398   uint64x2_t result;
5399   __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
5400            : "=w"(result)
5401            : "w"(a), "w"(b)
5402            : /* No clobbers */);
5403   return result;
5404 }
5405 
5406 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_f32(float32x2_t a,float32x2_t b)5407 vcle_f32 (float32x2_t a, float32x2_t b)
5408 {
5409   uint32x2_t result;
5410   __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
5411            : "=w"(result)
5412            : "w"(a), "w"(b)
5413            : /* No clobbers */);
5414   return result;
5415 }
5416 
5417 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_f64(float64x1_t a,float64x1_t b)5418 vcle_f64 (float64x1_t a, float64x1_t b)
5419 {
5420   uint64x1_t result;
5421   __asm__ ("fcmge %d0, %d2, %d1"
5422            : "=w"(result)
5423            : "w"(a), "w"(b)
5424            : /* No clobbers */);
5425   return result;
5426 }
5427 
5428 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_f32(float32x4_t a,float32x4_t b)5429 vcleq_f32 (float32x4_t a, float32x4_t b)
5430 {
5431   uint32x4_t result;
5432   __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
5433            : "=w"(result)
5434            : "w"(a), "w"(b)
5435            : /* No clobbers */);
5436   return result;
5437 }
5438 
5439 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_f64(float64x2_t a,float64x2_t b)5440 vcleq_f64 (float64x2_t a, float64x2_t b)
5441 {
5442   uint64x2_t result;
5443   __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
5444            : "=w"(result)
5445            : "w"(a), "w"(b)
5446            : /* No clobbers */);
5447   return result;
5448 }
5449 
5450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcls_s8(int8x8_t a)5451 vcls_s8 (int8x8_t a)
5452 {
5453   int8x8_t result;
5454   __asm__ ("cls %0.8b,%1.8b"
5455            : "=w"(result)
5456            : "w"(a)
5457            : /* No clobbers */);
5458   return result;
5459 }
5460 
5461 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcls_s16(int16x4_t a)5462 vcls_s16 (int16x4_t a)
5463 {
5464   int16x4_t result;
5465   __asm__ ("cls %0.4h,%1.4h"
5466            : "=w"(result)
5467            : "w"(a)
5468            : /* No clobbers */);
5469   return result;
5470 }
5471 
5472 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcls_s32(int32x2_t a)5473 vcls_s32 (int32x2_t a)
5474 {
5475   int32x2_t result;
5476   __asm__ ("cls %0.2s,%1.2s"
5477            : "=w"(result)
5478            : "w"(a)
5479            : /* No clobbers */);
5480   return result;
5481 }
5482 
5483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclsq_s8(int8x16_t a)5484 vclsq_s8 (int8x16_t a)
5485 {
5486   int8x16_t result;
5487   __asm__ ("cls %0.16b,%1.16b"
5488            : "=w"(result)
5489            : "w"(a)
5490            : /* No clobbers */);
5491   return result;
5492 }
5493 
5494 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclsq_s16(int16x8_t a)5495 vclsq_s16 (int16x8_t a)
5496 {
5497   int16x8_t result;
5498   __asm__ ("cls %0.8h,%1.8h"
5499            : "=w"(result)
5500            : "w"(a)
5501            : /* No clobbers */);
5502   return result;
5503 }
5504 
5505 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclsq_s32(int32x4_t a)5506 vclsq_s32 (int32x4_t a)
5507 {
5508   int32x4_t result;
5509   __asm__ ("cls %0.4s,%1.4s"
5510            : "=w"(result)
5511            : "w"(a)
5512            : /* No clobbers */);
5513   return result;
5514 }
5515 
5516 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_f32(float32x2_t a,float32x2_t b)5517 vclt_f32 (float32x2_t a, float32x2_t b)
5518 {
5519   uint32x2_t result;
5520   __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
5521            : "=w"(result)
5522            : "w"(a), "w"(b)
5523            : /* No clobbers */);
5524   return result;
5525 }
5526 
5527 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_f64(float64x1_t a,float64x1_t b)5528 vclt_f64 (float64x1_t a, float64x1_t b)
5529 {
5530   uint64x1_t result;
5531   __asm__ ("fcmgt %d0, %d2, %d1"
5532            : "=w"(result)
5533            : "w"(a), "w"(b)
5534            : /* No clobbers */);
5535   return result;
5536 }
5537 
5538 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_f32(float32x4_t a,float32x4_t b)5539 vcltq_f32 (float32x4_t a, float32x4_t b)
5540 {
5541   uint32x4_t result;
5542   __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
5543            : "=w"(result)
5544            : "w"(a), "w"(b)
5545            : /* No clobbers */);
5546   return result;
5547 }
5548 
5549 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_f64(float64x2_t a,float64x2_t b)5550 vcltq_f64 (float64x2_t a, float64x2_t b)
5551 {
5552   uint64x2_t result;
5553   __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
5554            : "=w"(result)
5555            : "w"(a), "w"(b)
5556            : /* No clobbers */);
5557   return result;
5558 }
5559 
5560 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vclz_s8(int8x8_t a)5561 vclz_s8 (int8x8_t a)
5562 {
5563   int8x8_t result;
5564   __asm__ ("clz %0.8b,%1.8b"
5565            : "=w"(result)
5566            : "w"(a)
5567            : /* No clobbers */);
5568   return result;
5569 }
5570 
5571 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vclz_s16(int16x4_t a)5572 vclz_s16 (int16x4_t a)
5573 {
5574   int16x4_t result;
5575   __asm__ ("clz %0.4h,%1.4h"
5576            : "=w"(result)
5577            : "w"(a)
5578            : /* No clobbers */);
5579   return result;
5580 }
5581 
5582 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vclz_s32(int32x2_t a)5583 vclz_s32 (int32x2_t a)
5584 {
5585   int32x2_t result;
5586   __asm__ ("clz %0.2s,%1.2s"
5587            : "=w"(result)
5588            : "w"(a)
5589            : /* No clobbers */);
5590   return result;
5591 }
5592 
5593 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclz_u8(uint8x8_t a)5594 vclz_u8 (uint8x8_t a)
5595 {
5596   uint8x8_t result;
5597   __asm__ ("clz %0.8b,%1.8b"
5598            : "=w"(result)
5599            : "w"(a)
5600            : /* No clobbers */);
5601   return result;
5602 }
5603 
5604 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclz_u16(uint16x4_t a)5605 vclz_u16 (uint16x4_t a)
5606 {
5607   uint16x4_t result;
5608   __asm__ ("clz %0.4h,%1.4h"
5609            : "=w"(result)
5610            : "w"(a)
5611            : /* No clobbers */);
5612   return result;
5613 }
5614 
5615 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclz_u32(uint32x2_t a)5616 vclz_u32 (uint32x2_t a)
5617 {
5618   uint32x2_t result;
5619   __asm__ ("clz %0.2s,%1.2s"
5620            : "=w"(result)
5621            : "w"(a)
5622            : /* No clobbers */);
5623   return result;
5624 }
5625 
5626 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclzq_s8(int8x16_t a)5627 vclzq_s8 (int8x16_t a)
5628 {
5629   int8x16_t result;
5630   __asm__ ("clz %0.16b,%1.16b"
5631            : "=w"(result)
5632            : "w"(a)
5633            : /* No clobbers */);
5634   return result;
5635 }
5636 
5637 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclzq_s16(int16x8_t a)5638 vclzq_s16 (int16x8_t a)
5639 {
5640   int16x8_t result;
5641   __asm__ ("clz %0.8h,%1.8h"
5642            : "=w"(result)
5643            : "w"(a)
5644            : /* No clobbers */);
5645   return result;
5646 }
5647 
5648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclzq_s32(int32x4_t a)5649 vclzq_s32 (int32x4_t a)
5650 {
5651   int32x4_t result;
5652   __asm__ ("clz %0.4s,%1.4s"
5653            : "=w"(result)
5654            : "w"(a)
5655            : /* No clobbers */);
5656   return result;
5657 }
5658 
5659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vclzq_u8(uint8x16_t a)5660 vclzq_u8 (uint8x16_t a)
5661 {
5662   uint8x16_t result;
5663   __asm__ ("clz %0.16b,%1.16b"
5664            : "=w"(result)
5665            : "w"(a)
5666            : /* No clobbers */);
5667   return result;
5668 }
5669 
5670 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclzq_u16(uint16x8_t a)5671 vclzq_u16 (uint16x8_t a)
5672 {
5673   uint16x8_t result;
5674   __asm__ ("clz %0.8h,%1.8h"
5675            : "=w"(result)
5676            : "w"(a)
5677            : /* No clobbers */);
5678   return result;
5679 }
5680 
5681 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclzq_u32(uint32x4_t a)5682 vclzq_u32 (uint32x4_t a)
5683 {
5684   uint32x4_t result;
5685   __asm__ ("clz %0.4s,%1.4s"
5686            : "=w"(result)
5687            : "w"(a)
5688            : /* No clobbers */);
5689   return result;
5690 }
5691 
5692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcnt_p8(poly8x8_t a)5693 vcnt_p8 (poly8x8_t a)
5694 {
5695   poly8x8_t result;
5696   __asm__ ("cnt %0.8b,%1.8b"
5697            : "=w"(result)
5698            : "w"(a)
5699            : /* No clobbers */);
5700   return result;
5701 }
5702 
5703 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcnt_s8(int8x8_t a)5704 vcnt_s8 (int8x8_t a)
5705 {
5706   int8x8_t result;
5707   __asm__ ("cnt %0.8b,%1.8b"
5708            : "=w"(result)
5709            : "w"(a)
5710            : /* No clobbers */);
5711   return result;
5712 }
5713 
5714 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcnt_u8(uint8x8_t a)5715 vcnt_u8 (uint8x8_t a)
5716 {
5717   uint8x8_t result;
5718   __asm__ ("cnt %0.8b,%1.8b"
5719            : "=w"(result)
5720            : "w"(a)
5721            : /* No clobbers */);
5722   return result;
5723 }
5724 
5725 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcntq_p8(poly8x16_t a)5726 vcntq_p8 (poly8x16_t a)
5727 {
5728   poly8x16_t result;
5729   __asm__ ("cnt %0.16b,%1.16b"
5730            : "=w"(result)
5731            : "w"(a)
5732            : /* No clobbers */);
5733   return result;
5734 }
5735 
5736 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcntq_s8(int8x16_t a)5737 vcntq_s8 (int8x16_t a)
5738 {
5739   int8x16_t result;
5740   __asm__ ("cnt %0.16b,%1.16b"
5741            : "=w"(result)
5742            : "w"(a)
5743            : /* No clobbers */);
5744   return result;
5745 }
5746 
5747 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcntq_u8(uint8x16_t a)5748 vcntq_u8 (uint8x16_t a)
5749 {
5750   uint8x16_t result;
5751   __asm__ ("cnt %0.16b,%1.16b"
5752            : "=w"(result)
5753            : "w"(a)
5754            : /* No clobbers */);
5755   return result;
5756 }
5757 
5758 #define vcopyq_lane_f32(a, b, c, d)                                     \
5759   __extension__                                                         \
5760     ({                                                                  \
5761        float32x4_t c_ = (c);                                            \
5762        float32x4_t a_ = (a);                                            \
5763        float32x4_t result;                                              \
5764        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
5765                 : "=w"(result)                                          \
5766                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5767                 : /* No clobbers */);                                   \
5768        result;                                                          \
5769      })
5770 
5771 #define vcopyq_lane_f64(a, b, c, d)                                     \
5772   __extension__                                                         \
5773     ({                                                                  \
5774        float64x2_t c_ = (c);                                            \
5775        float64x2_t a_ = (a);                                            \
5776        float64x2_t result;                                              \
5777        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
5778                 : "=w"(result)                                          \
5779                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5780                 : /* No clobbers */);                                   \
5781        result;                                                          \
5782      })
5783 
5784 #define vcopyq_lane_p8(a, b, c, d)                                      \
5785   __extension__                                                         \
5786     ({                                                                  \
5787        poly8x16_t c_ = (c);                                             \
5788        poly8x16_t a_ = (a);                                             \
5789        poly8x16_t result;                                               \
5790        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5791                 : "=w"(result)                                          \
5792                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5793                 : /* No clobbers */);                                   \
5794        result;                                                          \
5795      })
5796 
5797 #define vcopyq_lane_p16(a, b, c, d)                                     \
5798   __extension__                                                         \
5799     ({                                                                  \
5800        poly16x8_t c_ = (c);                                             \
5801        poly16x8_t a_ = (a);                                             \
5802        poly16x8_t result;                                               \
5803        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5804                 : "=w"(result)                                          \
5805                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5806                 : /* No clobbers */);                                   \
5807        result;                                                          \
5808      })
5809 
5810 #define vcopyq_lane_s8(a, b, c, d)                                      \
5811   __extension__                                                         \
5812     ({                                                                  \
5813        int8x16_t c_ = (c);                                              \
5814        int8x16_t a_ = (a);                                              \
5815        int8x16_t result;                                                \
5816        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5817                 : "=w"(result)                                          \
5818                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5819                 : /* No clobbers */);                                   \
5820        result;                                                          \
5821      })
5822 
5823 #define vcopyq_lane_s16(a, b, c, d)                                     \
5824   __extension__                                                         \
5825     ({                                                                  \
5826        int16x8_t c_ = (c);                                              \
5827        int16x8_t a_ = (a);                                              \
5828        int16x8_t result;                                                \
5829        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5830                 : "=w"(result)                                          \
5831                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5832                 : /* No clobbers */);                                   \
5833        result;                                                          \
5834      })
5835 
5836 #define vcopyq_lane_s32(a, b, c, d)                                     \
5837   __extension__                                                         \
5838     ({                                                                  \
5839        int32x4_t c_ = (c);                                              \
5840        int32x4_t a_ = (a);                                              \
5841        int32x4_t result;                                                \
5842        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
5843                 : "=w"(result)                                          \
5844                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5845                 : /* No clobbers */);                                   \
5846        result;                                                          \
5847      })
5848 
5849 #define vcopyq_lane_s64(a, b, c, d)                                     \
5850   __extension__                                                         \
5851     ({                                                                  \
5852        int64x2_t c_ = (c);                                              \
5853        int64x2_t a_ = (a);                                              \
5854        int64x2_t result;                                                \
5855        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
5856                 : "=w"(result)                                          \
5857                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5858                 : /* No clobbers */);                                   \
5859        result;                                                          \
5860      })
5861 
5862 #define vcopyq_lane_u8(a, b, c, d)                                      \
5863   __extension__                                                         \
5864     ({                                                                  \
5865        uint8x16_t c_ = (c);                                             \
5866        uint8x16_t a_ = (a);                                             \
5867        uint8x16_t result;                                               \
5868        __asm__ ("ins %0.b[%2], %3.b[%4]"                                \
5869                 : "=w"(result)                                          \
5870                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5871                 : /* No clobbers */);                                   \
5872        result;                                                          \
5873      })
5874 
5875 #define vcopyq_lane_u16(a, b, c, d)                                     \
5876   __extension__                                                         \
5877     ({                                                                  \
5878        uint16x8_t c_ = (c);                                             \
5879        uint16x8_t a_ = (a);                                             \
5880        uint16x8_t result;                                               \
5881        __asm__ ("ins %0.h[%2], %3.h[%4]"                                \
5882                 : "=w"(result)                                          \
5883                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5884                 : /* No clobbers */);                                   \
5885        result;                                                          \
5886      })
5887 
5888 #define vcopyq_lane_u32(a, b, c, d)                                     \
5889   __extension__                                                         \
5890     ({                                                                  \
5891        uint32x4_t c_ = (c);                                             \
5892        uint32x4_t a_ = (a);                                             \
5893        uint32x4_t result;                                               \
5894        __asm__ ("ins %0.s[%2], %3.s[%4]"                                \
5895                 : "=w"(result)                                          \
5896                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5897                 : /* No clobbers */);                                   \
5898        result;                                                          \
5899      })
5900 
5901 #define vcopyq_lane_u64(a, b, c, d)                                     \
5902   __extension__                                                         \
5903     ({                                                                  \
5904        uint64x2_t c_ = (c);                                             \
5905        uint64x2_t a_ = (a);                                             \
5906        uint64x2_t result;                                               \
5907        __asm__ ("ins %0.d[%2], %3.d[%4]"                                \
5908                 : "=w"(result)                                          \
5909                 : "0"(a_), "i"(b), "w"(c_), "i"(d)                      \
5910                 : /* No clobbers */);                                   \
5911        result;                                                          \
5912      })
5913 
5914 /* vcvt_f16_f32 not supported */
5915 
5916 /* vcvt_f32_f16 not supported */
5917 
5918 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_f64(float64x2_t a)5919 vcvt_f32_f64 (float64x2_t a)
5920 {
5921   float32x2_t result;
5922   __asm__ ("fcvtn %0.2s,%1.2d"
5923            : "=w"(result)
5924            : "w"(a)
5925            : /* No clobbers */);
5926   return result;
5927 }
5928 
5929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_s32(int32x2_t a)5930 vcvt_f32_s32 (int32x2_t a)
5931 {
5932   float32x2_t result;
5933   __asm__ ("scvtf %0.2s, %1.2s"
5934            : "=w"(result)
5935            : "w"(a)
5936            : /* No clobbers */);
5937   return result;
5938 }
5939 
5940 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvt_f32_u32(uint32x2_t a)5941 vcvt_f32_u32 (uint32x2_t a)
5942 {
5943   float32x2_t result;
5944   __asm__ ("ucvtf %0.2s, %1.2s"
5945            : "=w"(result)
5946            : "w"(a)
5947            : /* No clobbers */);
5948   return result;
5949 }
5950 
5951 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_f64_f32(float32x2_t a)5952 vcvt_f64_f32 (float32x2_t a)
5953 {
5954   float64x2_t result;
5955   __asm__ ("fcvtl %0.2d,%1.2s"
5956            : "=w"(result)
5957            : "w"(a)
5958            : /* No clobbers */);
5959   return result;
5960 }
5961 
5962 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcvt_f64_s64(uint64x1_t a)5963 vcvt_f64_s64 (uint64x1_t a)
5964 {
5965   float64x1_t result;
5966   __asm__ ("scvtf %d0, %d1"
5967            : "=w"(result)
5968            : "w"(a)
5969            : /* No clobbers */);
5970   return result;
5971 }
5972 
5973 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcvt_f64_u64(uint64x1_t a)5974 vcvt_f64_u64 (uint64x1_t a)
5975 {
5976   float64x1_t result;
5977   __asm__ ("ucvtf %d0, %d1"
5978            : "=w"(result)
5979            : "w"(a)
5980            : /* No clobbers */);
5981   return result;
5982 }
5983 
5984 /* vcvt_high_f16_f32 not supported */
5985 
5986 /* vcvt_high_f32_f16 not supported */
5987 
5988 static float32x2_t vdup_n_f32 (float32_t);
5989 
5990 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_high_f32_f64(float32x2_t a,float64x2_t b)5991 vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
5992 {
5993   float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
5994   __asm__ ("fcvtn2 %0.4s,%2.2d"
5995            : "+w"(result)
5996            : "w"(b)
5997            : /* No clobbers */);
5998   return result;
5999 }
6000 
6001 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_high_f64_f32(float32x4_t a)6002 vcvt_high_f64_f32 (float32x4_t a)
6003 {
6004   float64x2_t result;
6005   __asm__ ("fcvtl2 %0.2d,%1.4s"
6006            : "=w"(result)
6007            : "w"(a)
6008            : /* No clobbers */);
6009   return result;
6010 }
6011 
6012 #define vcvt_n_f32_s32(a, b)                                            \
6013   __extension__                                                         \
6014     ({                                                                  \
6015        int32x2_t a_ = (a);                                              \
6016        float32x2_t result;                                              \
6017        __asm__ ("scvtf %0.2s, %1.2s, #%2"                               \
6018                 : "=w"(result)                                          \
6019                 : "w"(a_), "i"(b)                                       \
6020                 : /* No clobbers */);                                   \
6021        result;                                                          \
6022      })
6023 
6024 #define vcvt_n_f32_u32(a, b)                                            \
6025   __extension__                                                         \
6026     ({                                                                  \
6027        uint32x2_t a_ = (a);                                             \
6028        float32x2_t result;                                              \
6029        __asm__ ("ucvtf %0.2s, %1.2s, #%2"                               \
6030                 : "=w"(result)                                          \
6031                 : "w"(a_), "i"(b)                                       \
6032                 : /* No clobbers */);                                   \
6033        result;                                                          \
6034      })
6035 
6036 #define vcvt_n_s32_f32(a, b)                                            \
6037   __extension__                                                         \
6038     ({                                                                  \
6039        float32x2_t a_ = (a);                                            \
6040        int32x2_t result;                                                \
6041        __asm__ ("fcvtzs %0.2s, %1.2s, #%2"                              \
6042                 : "=w"(result)                                          \
6043                 : "w"(a_), "i"(b)                                       \
6044                 : /* No clobbers */);                                   \
6045        result;                                                          \
6046      })
6047 
6048 #define vcvt_n_u32_f32(a, b)                                            \
6049   __extension__                                                         \
6050     ({                                                                  \
6051        float32x2_t a_ = (a);                                            \
6052        uint32x2_t result;                                               \
6053        __asm__ ("fcvtzu %0.2s, %1.2s, #%2"                              \
6054                 : "=w"(result)                                          \
6055                 : "w"(a_), "i"(b)                                       \
6056                 : /* No clobbers */);                                   \
6057        result;                                                          \
6058      })
6059 
6060 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_s32_f32(float32x2_t a)6061 vcvt_s32_f32 (float32x2_t a)
6062 {
6063   int32x2_t result;
6064   __asm__ ("fcvtzs %0.2s, %1.2s"
6065            : "=w"(result)
6066            : "w"(a)
6067            : /* No clobbers */);
6068   return result;
6069 }
6070 
6071 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvt_u32_f32(float32x2_t a)6072 vcvt_u32_f32 (float32x2_t a)
6073 {
6074   uint32x2_t result;
6075   __asm__ ("fcvtzu %0.2s, %1.2s"
6076            : "=w"(result)
6077            : "w"(a)
6078            : /* No clobbers */);
6079   return result;
6080 }
6081 
6082 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvta_s32_f32(float32x2_t a)6083 vcvta_s32_f32 (float32x2_t a)
6084 {
6085   int32x2_t result;
6086   __asm__ ("fcvtas %0.2s, %1.2s"
6087            : "=w"(result)
6088            : "w"(a)
6089            : /* No clobbers */);
6090   return result;
6091 }
6092 
6093 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvta_u32_f32(float32x2_t a)6094 vcvta_u32_f32 (float32x2_t a)
6095 {
6096   uint32x2_t result;
6097   __asm__ ("fcvtau %0.2s, %1.2s"
6098            : "=w"(result)
6099            : "w"(a)
6100            : /* No clobbers */);
6101   return result;
6102 }
6103 
6104 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtad_s64_f64(float64_t a)6105 vcvtad_s64_f64 (float64_t a)
6106 {
6107   float64_t result;
6108   __asm__ ("fcvtas %d0,%d1"
6109            : "=w"(result)
6110            : "w"(a)
6111            : /* No clobbers */);
6112   return result;
6113 }
6114 
6115 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtad_u64_f64(float64_t a)6116 vcvtad_u64_f64 (float64_t a)
6117 {
6118   float64_t result;
6119   __asm__ ("fcvtau %d0,%d1"
6120            : "=w"(result)
6121            : "w"(a)
6122            : /* No clobbers */);
6123   return result;
6124 }
6125 
6126 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtaq_s32_f32(float32x4_t a)6127 vcvtaq_s32_f32 (float32x4_t a)
6128 {
6129   int32x4_t result;
6130   __asm__ ("fcvtas %0.4s, %1.4s"
6131            : "=w"(result)
6132            : "w"(a)
6133            : /* No clobbers */);
6134   return result;
6135 }
6136 
6137 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtaq_s64_f64(float64x2_t a)6138 vcvtaq_s64_f64 (float64x2_t a)
6139 {
6140   int64x2_t result;
6141   __asm__ ("fcvtas %0.2d, %1.2d"
6142            : "=w"(result)
6143            : "w"(a)
6144            : /* No clobbers */);
6145   return result;
6146 }
6147 
6148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtaq_u32_f32(float32x4_t a)6149 vcvtaq_u32_f32 (float32x4_t a)
6150 {
6151   uint32x4_t result;
6152   __asm__ ("fcvtau %0.4s, %1.4s"
6153            : "=w"(result)
6154            : "w"(a)
6155            : /* No clobbers */);
6156   return result;
6157 }
6158 
6159 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtaq_u64_f64(float64x2_t a)6160 vcvtaq_u64_f64 (float64x2_t a)
6161 {
6162   uint64x2_t result;
6163   __asm__ ("fcvtau %0.2d, %1.2d"
6164            : "=w"(result)
6165            : "w"(a)
6166            : /* No clobbers */);
6167   return result;
6168 }
6169 
6170 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtas_s64_f64(float32_t a)6171 vcvtas_s64_f64 (float32_t a)
6172 {
6173   float32_t result;
6174   __asm__ ("fcvtas %s0,%s1"
6175            : "=w"(result)
6176            : "w"(a)
6177            : /* No clobbers */);
6178   return result;
6179 }
6180 
6181 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtas_u64_f64(float32_t a)6182 vcvtas_u64_f64 (float32_t a)
6183 {
6184   float32_t result;
6185   __asm__ ("fcvtau %s0,%s1"
6186            : "=w"(result)
6187            : "w"(a)
6188            : /* No clobbers */);
6189   return result;
6190 }
6191 
6192 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvtd_f64_s64(int64_t a)6193 vcvtd_f64_s64 (int64_t a)
6194 {
6195   int64_t result;
6196   __asm__ ("scvtf %d0,%d1"
6197            : "=w"(result)
6198            : "w"(a)
6199            : /* No clobbers */);
6200   return result;
6201 }
6202 
6203 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvtd_f64_u64(uint64_t a)6204 vcvtd_f64_u64 (uint64_t a)
6205 {
6206   uint64_t result;
6207   __asm__ ("ucvtf %d0,%d1"
6208            : "=w"(result)
6209            : "w"(a)
6210            : /* No clobbers */);
6211   return result;
6212 }
6213 
6214 #define vcvtd_n_f64_s64(a, b)                                           \
6215   __extension__                                                         \
6216     ({                                                                  \
6217        int64_t a_ = (a);                                                \
6218        int64_t result;                                                  \
6219        __asm__ ("scvtf %d0,%d1,%2"                                      \
6220                 : "=w"(result)                                          \
6221                 : "w"(a_), "i"(b)                                       \
6222                 : /* No clobbers */);                                   \
6223        result;                                                          \
6224      })
6225 
6226 #define vcvtd_n_f64_u64(a, b)                                           \
6227   __extension__                                                         \
6228     ({                                                                  \
6229        uint64_t a_ = (a);                                               \
6230        uint64_t result;                                                 \
6231        __asm__ ("ucvtf %d0,%d1,%2"                                      \
6232                 : "=w"(result)                                          \
6233                 : "w"(a_), "i"(b)                                       \
6234                 : /* No clobbers */);                                   \
6235        result;                                                          \
6236      })
6237 
6238 #define vcvtd_n_s64_f64(a, b)                                           \
6239   __extension__                                                         \
6240     ({                                                                  \
6241        float64_t a_ = (a);                                              \
6242        float64_t result;                                                \
6243        __asm__ ("fcvtzs %d0,%d1,%2"                                     \
6244                 : "=w"(result)                                          \
6245                 : "w"(a_), "i"(b)                                       \
6246                 : /* No clobbers */);                                   \
6247        result;                                                          \
6248      })
6249 
6250 #define vcvtd_n_u64_f64(a, b)                                           \
6251   __extension__                                                         \
6252     ({                                                                  \
6253        float64_t a_ = (a);                                              \
6254        float64_t result;                                                \
6255        __asm__ ("fcvtzu %d0,%d1,%2"                                     \
6256                 : "=w"(result)                                          \
6257                 : "w"(a_), "i"(b)                                       \
6258                 : /* No clobbers */);                                   \
6259        result;                                                          \
6260      })
6261 
6262 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtd_s64_f64(float64_t a)6263 vcvtd_s64_f64 (float64_t a)
6264 {
6265   float64_t result;
6266   __asm__ ("fcvtzs %d0,%d1"
6267            : "=w"(result)
6268            : "w"(a)
6269            : /* No clobbers */);
6270   return result;
6271 }
6272 
6273 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtd_u64_f64(float64_t a)6274 vcvtd_u64_f64 (float64_t a)
6275 {
6276   float64_t result;
6277   __asm__ ("fcvtzu %d0,%d1"
6278            : "=w"(result)
6279            : "w"(a)
6280            : /* No clobbers */);
6281   return result;
6282 }
6283 
6284 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtm_s32_f32(float32x2_t a)6285 vcvtm_s32_f32 (float32x2_t a)
6286 {
6287   int32x2_t result;
6288   __asm__ ("fcvtms %0.2s, %1.2s"
6289            : "=w"(result)
6290            : "w"(a)
6291            : /* No clobbers */);
6292   return result;
6293 }
6294 
6295 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtm_u32_f32(float32x2_t a)6296 vcvtm_u32_f32 (float32x2_t a)
6297 {
6298   uint32x2_t result;
6299   __asm__ ("fcvtmu %0.2s, %1.2s"
6300            : "=w"(result)
6301            : "w"(a)
6302            : /* No clobbers */);
6303   return result;
6304 }
6305 
6306 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtmd_s64_f64(float64_t a)6307 vcvtmd_s64_f64 (float64_t a)
6308 {
6309   float64_t result;
6310   __asm__ ("fcvtms %d0,%d1"
6311            : "=w"(result)
6312            : "w"(a)
6313            : /* No clobbers */);
6314   return result;
6315 }
6316 
6317 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtmd_u64_f64(float64_t a)6318 vcvtmd_u64_f64 (float64_t a)
6319 {
6320   float64_t result;
6321   __asm__ ("fcvtmu %d0,%d1"
6322            : "=w"(result)
6323            : "w"(a)
6324            : /* No clobbers */);
6325   return result;
6326 }
6327 
6328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtmq_s32_f32(float32x4_t a)6329 vcvtmq_s32_f32 (float32x4_t a)
6330 {
6331   int32x4_t result;
6332   __asm__ ("fcvtms %0.4s, %1.4s"
6333            : "=w"(result)
6334            : "w"(a)
6335            : /* No clobbers */);
6336   return result;
6337 }
6338 
6339 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtmq_s64_f64(float64x2_t a)6340 vcvtmq_s64_f64 (float64x2_t a)
6341 {
6342   int64x2_t result;
6343   __asm__ ("fcvtms %0.2d, %1.2d"
6344            : "=w"(result)
6345            : "w"(a)
6346            : /* No clobbers */);
6347   return result;
6348 }
6349 
6350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtmq_u32_f32(float32x4_t a)6351 vcvtmq_u32_f32 (float32x4_t a)
6352 {
6353   uint32x4_t result;
6354   __asm__ ("fcvtmu %0.4s, %1.4s"
6355            : "=w"(result)
6356            : "w"(a)
6357            : /* No clobbers */);
6358   return result;
6359 }
6360 
6361 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtmq_u64_f64(float64x2_t a)6362 vcvtmq_u64_f64 (float64x2_t a)
6363 {
6364   uint64x2_t result;
6365   __asm__ ("fcvtmu %0.2d, %1.2d"
6366            : "=w"(result)
6367            : "w"(a)
6368            : /* No clobbers */);
6369   return result;
6370 }
6371 
6372 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtms_s64_f64(float32_t a)6373 vcvtms_s64_f64 (float32_t a)
6374 {
6375   float32_t result;
6376   __asm__ ("fcvtms %s0,%s1"
6377            : "=w"(result)
6378            : "w"(a)
6379            : /* No clobbers */);
6380   return result;
6381 }
6382 
6383 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtms_u64_f64(float32_t a)6384 vcvtms_u64_f64 (float32_t a)
6385 {
6386   float32_t result;
6387   __asm__ ("fcvtmu %s0,%s1"
6388            : "=w"(result)
6389            : "w"(a)
6390            : /* No clobbers */);
6391   return result;
6392 }
6393 
6394 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtn_s32_f32(float32x2_t a)6395 vcvtn_s32_f32 (float32x2_t a)
6396 {
6397   int32x2_t result;
6398   __asm__ ("fcvtns %0.2s, %1.2s"
6399            : "=w"(result)
6400            : "w"(a)
6401            : /* No clobbers */);
6402   return result;
6403 }
6404 
6405 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtn_u32_f32(float32x2_t a)6406 vcvtn_u32_f32 (float32x2_t a)
6407 {
6408   uint32x2_t result;
6409   __asm__ ("fcvtnu %0.2s, %1.2s"
6410            : "=w"(result)
6411            : "w"(a)
6412            : /* No clobbers */);
6413   return result;
6414 }
6415 
6416 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtnd_s64_f64(float64_t a)6417 vcvtnd_s64_f64 (float64_t a)
6418 {
6419   float64_t result;
6420   __asm__ ("fcvtns %d0,%d1"
6421            : "=w"(result)
6422            : "w"(a)
6423            : /* No clobbers */);
6424   return result;
6425 }
6426 
6427 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtnd_u64_f64(float64_t a)6428 vcvtnd_u64_f64 (float64_t a)
6429 {
6430   float64_t result;
6431   __asm__ ("fcvtnu %d0,%d1"
6432            : "=w"(result)
6433            : "w"(a)
6434            : /* No clobbers */);
6435   return result;
6436 }
6437 
6438 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtnq_s32_f32(float32x4_t a)6439 vcvtnq_s32_f32 (float32x4_t a)
6440 {
6441   int32x4_t result;
6442   __asm__ ("fcvtns %0.4s, %1.4s"
6443            : "=w"(result)
6444            : "w"(a)
6445            : /* No clobbers */);
6446   return result;
6447 }
6448 
6449 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtnq_s64_f64(float64x2_t a)6450 vcvtnq_s64_f64 (float64x2_t a)
6451 {
6452   int64x2_t result;
6453   __asm__ ("fcvtns %0.2d, %1.2d"
6454            : "=w"(result)
6455            : "w"(a)
6456            : /* No clobbers */);
6457   return result;
6458 }
6459 
6460 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtnq_u32_f32(float32x4_t a)6461 vcvtnq_u32_f32 (float32x4_t a)
6462 {
6463   uint32x4_t result;
6464   __asm__ ("fcvtnu %0.4s, %1.4s"
6465            : "=w"(result)
6466            : "w"(a)
6467            : /* No clobbers */);
6468   return result;
6469 }
6470 
6471 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtnq_u64_f64(float64x2_t a)6472 vcvtnq_u64_f64 (float64x2_t a)
6473 {
6474   uint64x2_t result;
6475   __asm__ ("fcvtnu %0.2d, %1.2d"
6476            : "=w"(result)
6477            : "w"(a)
6478            : /* No clobbers */);
6479   return result;
6480 }
6481 
6482 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtns_s64_f64(float32_t a)6483 vcvtns_s64_f64 (float32_t a)
6484 {
6485   float32_t result;
6486   __asm__ ("fcvtns %s0,%s1"
6487            : "=w"(result)
6488            : "w"(a)
6489            : /* No clobbers */);
6490   return result;
6491 }
6492 
6493 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtns_u64_f64(float32_t a)6494 vcvtns_u64_f64 (float32_t a)
6495 {
6496   float32_t result;
6497   __asm__ ("fcvtnu %s0,%s1"
6498            : "=w"(result)
6499            : "w"(a)
6500            : /* No clobbers */);
6501   return result;
6502 }
6503 
6504 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvtp_s32_f32(float32x2_t a)6505 vcvtp_s32_f32 (float32x2_t a)
6506 {
6507   int32x2_t result;
6508   __asm__ ("fcvtps %0.2s, %1.2s"
6509            : "=w"(result)
6510            : "w"(a)
6511            : /* No clobbers */);
6512   return result;
6513 }
6514 
6515 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcvtp_u32_f32(float32x2_t a)6516 vcvtp_u32_f32 (float32x2_t a)
6517 {
6518   uint32x2_t result;
6519   __asm__ ("fcvtpu %0.2s, %1.2s"
6520            : "=w"(result)
6521            : "w"(a)
6522            : /* No clobbers */);
6523   return result;
6524 }
6525 
6526 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtpd_s64_f64(float64_t a)6527 vcvtpd_s64_f64 (float64_t a)
6528 {
6529   float64_t result;
6530   __asm__ ("fcvtps %d0,%d1"
6531            : "=w"(result)
6532            : "w"(a)
6533            : /* No clobbers */);
6534   return result;
6535 }
6536 
6537 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vcvtpd_u64_f64(float64_t a)6538 vcvtpd_u64_f64 (float64_t a)
6539 {
6540   float64_t result;
6541   __asm__ ("fcvtpu %d0,%d1"
6542            : "=w"(result)
6543            : "w"(a)
6544            : /* No clobbers */);
6545   return result;
6546 }
6547 
6548 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtpq_s32_f32(float32x4_t a)6549 vcvtpq_s32_f32 (float32x4_t a)
6550 {
6551   int32x4_t result;
6552   __asm__ ("fcvtps %0.4s, %1.4s"
6553            : "=w"(result)
6554            : "w"(a)
6555            : /* No clobbers */);
6556   return result;
6557 }
6558 
6559 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtpq_s64_f64(float64x2_t a)6560 vcvtpq_s64_f64 (float64x2_t a)
6561 {
6562   int64x2_t result;
6563   __asm__ ("fcvtps %0.2d, %1.2d"
6564            : "=w"(result)
6565            : "w"(a)
6566            : /* No clobbers */);
6567   return result;
6568 }
6569 
6570 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtpq_u32_f32(float32x4_t a)6571 vcvtpq_u32_f32 (float32x4_t a)
6572 {
6573   uint32x4_t result;
6574   __asm__ ("fcvtpu %0.4s, %1.4s"
6575            : "=w"(result)
6576            : "w"(a)
6577            : /* No clobbers */);
6578   return result;
6579 }
6580 
6581 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtpq_u64_f64(float64x2_t a)6582 vcvtpq_u64_f64 (float64x2_t a)
6583 {
6584   uint64x2_t result;
6585   __asm__ ("fcvtpu %0.2d, %1.2d"
6586            : "=w"(result)
6587            : "w"(a)
6588            : /* No clobbers */);
6589   return result;
6590 }
6591 
6592 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtps_s64_f64(float32_t a)6593 vcvtps_s64_f64 (float32_t a)
6594 {
6595   float32_t result;
6596   __asm__ ("fcvtps %s0,%s1"
6597            : "=w"(result)
6598            : "w"(a)
6599            : /* No clobbers */);
6600   return result;
6601 }
6602 
6603 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtps_u64_f64(float32_t a)6604 vcvtps_u64_f64 (float32_t a)
6605 {
6606   float32_t result;
6607   __asm__ ("fcvtpu %s0,%s1"
6608            : "=w"(result)
6609            : "w"(a)
6610            : /* No clobbers */);
6611   return result;
6612 }
6613 
6614 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_f32_s32(int32x4_t a)6615 vcvtq_f32_s32 (int32x4_t a)
6616 {
6617   float32x4_t result;
6618   __asm__ ("scvtf %0.4s, %1.4s"
6619            : "=w"(result)
6620            : "w"(a)
6621            : /* No clobbers */);
6622   return result;
6623 }
6624 
6625 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_f32_u32(uint32x4_t a)6626 vcvtq_f32_u32 (uint32x4_t a)
6627 {
6628   float32x4_t result;
6629   __asm__ ("ucvtf %0.4s, %1.4s"
6630            : "=w"(result)
6631            : "w"(a)
6632            : /* No clobbers */);
6633   return result;
6634 }
6635 
6636 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvtq_f64_s64(int64x2_t a)6637 vcvtq_f64_s64 (int64x2_t a)
6638 {
6639   float64x2_t result;
6640   __asm__ ("scvtf %0.2d, %1.2d"
6641            : "=w"(result)
6642            : "w"(a)
6643            : /* No clobbers */);
6644   return result;
6645 }
6646 
6647 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvtq_f64_u64(uint64x2_t a)6648 vcvtq_f64_u64 (uint64x2_t a)
6649 {
6650   float64x2_t result;
6651   __asm__ ("ucvtf %0.2d, %1.2d"
6652            : "=w"(result)
6653            : "w"(a)
6654            : /* No clobbers */);
6655   return result;
6656 }
6657 
6658 #define vcvtq_n_f32_s32(a, b)                                           \
6659   __extension__                                                         \
6660     ({                                                                  \
6661        int32x4_t a_ = (a);                                              \
6662        float32x4_t result;                                              \
6663        __asm__ ("scvtf %0.4s, %1.4s, #%2"                               \
6664                 : "=w"(result)                                          \
6665                 : "w"(a_), "i"(b)                                       \
6666                 : /* No clobbers */);                                   \
6667        result;                                                          \
6668      })
6669 
6670 #define vcvtq_n_f32_u32(a, b)                                           \
6671   __extension__                                                         \
6672     ({                                                                  \
6673        uint32x4_t a_ = (a);                                             \
6674        float32x4_t result;                                              \
6675        __asm__ ("ucvtf %0.4s, %1.4s, #%2"                               \
6676                 : "=w"(result)                                          \
6677                 : "w"(a_), "i"(b)                                       \
6678                 : /* No clobbers */);                                   \
6679        result;                                                          \
6680      })
6681 
6682 #define vcvtq_n_f64_s64(a, b)                                           \
6683   __extension__                                                         \
6684     ({                                                                  \
6685        int64x2_t a_ = (a);                                              \
6686        float64x2_t result;                                              \
6687        __asm__ ("scvtf %0.2d, %1.2d, #%2"                               \
6688                 : "=w"(result)                                          \
6689                 : "w"(a_), "i"(b)                                       \
6690                 : /* No clobbers */);                                   \
6691        result;                                                          \
6692      })
6693 
6694 #define vcvtq_n_f64_u64(a, b)                                           \
6695   __extension__                                                         \
6696     ({                                                                  \
6697        uint64x2_t a_ = (a);                                             \
6698        float64x2_t result;                                              \
6699        __asm__ ("ucvtf %0.2d, %1.2d, #%2"                               \
6700                 : "=w"(result)                                          \
6701                 : "w"(a_), "i"(b)                                       \
6702                 : /* No clobbers */);                                   \
6703        result;                                                          \
6704      })
6705 
6706 #define vcvtq_n_s32_f32(a, b)                                           \
6707   __extension__                                                         \
6708     ({                                                                  \
6709        float32x4_t a_ = (a);                                            \
6710        int32x4_t result;                                                \
6711        __asm__ ("fcvtzs %0.4s, %1.4s, #%2"                              \
6712                 : "=w"(result)                                          \
6713                 : "w"(a_), "i"(b)                                       \
6714                 : /* No clobbers */);                                   \
6715        result;                                                          \
6716      })
6717 
6718 #define vcvtq_n_s64_f64(a, b)                                           \
6719   __extension__                                                         \
6720     ({                                                                  \
6721        float64x2_t a_ = (a);                                            \
6722        int64x2_t result;                                                \
6723        __asm__ ("fcvtzs %0.2d, %1.2d, #%2"                              \
6724                 : "=w"(result)                                          \
6725                 : "w"(a_), "i"(b)                                       \
6726                 : /* No clobbers */);                                   \
6727        result;                                                          \
6728      })
6729 
6730 #define vcvtq_n_u32_f32(a, b)                                           \
6731   __extension__                                                         \
6732     ({                                                                  \
6733        float32x4_t a_ = (a);                                            \
6734        uint32x4_t result;                                               \
6735        __asm__ ("fcvtzu %0.4s, %1.4s, #%2"                              \
6736                 : "=w"(result)                                          \
6737                 : "w"(a_), "i"(b)                                       \
6738                 : /* No clobbers */);                                   \
6739        result;                                                          \
6740      })
6741 
6742 #define vcvtq_n_u64_f64(a, b)                                           \
6743   __extension__                                                         \
6744     ({                                                                  \
6745        float64x2_t a_ = (a);                                            \
6746        uint64x2_t result;                                               \
6747        __asm__ ("fcvtzu %0.2d, %1.2d, #%2"                              \
6748                 : "=w"(result)                                          \
6749                 : "w"(a_), "i"(b)                                       \
6750                 : /* No clobbers */);                                   \
6751        result;                                                          \
6752      })
6753 
6754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtq_s32_f32(float32x4_t a)6755 vcvtq_s32_f32 (float32x4_t a)
6756 {
6757   int32x4_t result;
6758   __asm__ ("fcvtzs %0.4s, %1.4s"
6759            : "=w"(result)
6760            : "w"(a)
6761            : /* No clobbers */);
6762   return result;
6763 }
6764 
6765 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcvtq_s64_f64(float64x2_t a)6766 vcvtq_s64_f64 (float64x2_t a)
6767 {
6768   int64x2_t result;
6769   __asm__ ("fcvtzs %0.2d, %1.2d"
6770            : "=w"(result)
6771            : "w"(a)
6772            : /* No clobbers */);
6773   return result;
6774 }
6775 
6776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcvtq_u32_f32(float32x4_t a)6777 vcvtq_u32_f32 (float32x4_t a)
6778 {
6779   uint32x4_t result;
6780   __asm__ ("fcvtzu %0.4s, %1.4s"
6781            : "=w"(result)
6782            : "w"(a)
6783            : /* No clobbers */);
6784   return result;
6785 }
6786 
6787 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcvtq_u64_f64(float64x2_t a)6788 vcvtq_u64_f64 (float64x2_t a)
6789 {
6790   uint64x2_t result;
6791   __asm__ ("fcvtzu %0.2d, %1.2d"
6792            : "=w"(result)
6793            : "w"(a)
6794            : /* No clobbers */);
6795   return result;
6796 }
6797 
6798 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvts_f64_s32(int32_t a)6799 vcvts_f64_s32 (int32_t a)
6800 {
6801   int32_t result;
6802   __asm__ ("scvtf %s0,%s1"
6803            : "=w"(result)
6804            : "w"(a)
6805            : /* No clobbers */);
6806   return result;
6807 }
6808 
6809 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvts_f64_u32(uint32_t a)6810 vcvts_f64_u32 (uint32_t a)
6811 {
6812   uint32_t result;
6813   __asm__ ("ucvtf %s0,%s1"
6814            : "=w"(result)
6815            : "w"(a)
6816            : /* No clobbers */);
6817   return result;
6818 }
6819 
6820 #define vcvts_n_f32_s32(a, b)                                           \
6821   __extension__                                                         \
6822     ({                                                                  \
6823        int32_t a_ = (a);                                                \
6824        int32_t result;                                                  \
6825        __asm__ ("scvtf %s0,%s1,%2"                                      \
6826                 : "=w"(result)                                          \
6827                 : "w"(a_), "i"(b)                                       \
6828                 : /* No clobbers */);                                   \
6829        result;                                                          \
6830      })
6831 
6832 #define vcvts_n_f32_u32(a, b)                                           \
6833   __extension__                                                         \
6834     ({                                                                  \
6835        uint32_t a_ = (a);                                               \
6836        uint32_t result;                                                 \
6837        __asm__ ("ucvtf %s0,%s1,%2"                                      \
6838                 : "=w"(result)                                          \
6839                 : "w"(a_), "i"(b)                                       \
6840                 : /* No clobbers */);                                   \
6841        result;                                                          \
6842      })
6843 
6844 #define vcvts_n_s32_f32(a, b)                                           \
6845   __extension__                                                         \
6846     ({                                                                  \
6847        float32_t a_ = (a);                                              \
6848        float32_t result;                                                \
6849        __asm__ ("fcvtzs %s0,%s1,%2"                                     \
6850                 : "=w"(result)                                          \
6851                 : "w"(a_), "i"(b)                                       \
6852                 : /* No clobbers */);                                   \
6853        result;                                                          \
6854      })
6855 
6856 #define vcvts_n_u32_f32(a, b)                                           \
6857   __extension__                                                         \
6858     ({                                                                  \
6859        float32_t a_ = (a);                                              \
6860        float32_t result;                                                \
6861        __asm__ ("fcvtzu %s0,%s1,%2"                                     \
6862                 : "=w"(result)                                          \
6863                 : "w"(a_), "i"(b)                                       \
6864                 : /* No clobbers */);                                   \
6865        result;                                                          \
6866      })
6867 
6868 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvts_s64_f64(float32_t a)6869 vcvts_s64_f64 (float32_t a)
6870 {
6871   float32_t result;
6872   __asm__ ("fcvtzs %s0,%s1"
6873            : "=w"(result)
6874            : "w"(a)
6875            : /* No clobbers */);
6876   return result;
6877 }
6878 
6879 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvts_u64_f64(float32_t a)6880 vcvts_u64_f64 (float32_t a)
6881 {
6882   float32_t result;
6883   __asm__ ("fcvtzu %s0,%s1"
6884            : "=w"(result)
6885            : "w"(a)
6886            : /* No clobbers */);
6887   return result;
6888 }
6889 
6890 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvtx_f32_f64(float64x2_t a)6891 vcvtx_f32_f64 (float64x2_t a)
6892 {
6893   float32x2_t result;
6894   __asm__ ("fcvtxn %0.2s,%1.2d"
6895            : "=w"(result)
6896            : "w"(a)
6897            : /* No clobbers */);
6898   return result;
6899 }
6900 
6901 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtx_high_f32_f64(float64x2_t a)6902 vcvtx_high_f32_f64 (float64x2_t a)
6903 {
6904   float32x4_t result;
6905   __asm__ ("fcvtxn2 %0.4s,%1.2d"
6906            : "=w"(result)
6907            : "w"(a)
6908            : /* No clobbers */);
6909   return result;
6910 }
6911 
6912 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vcvtxd_f32_f64(float64_t a)6913 vcvtxd_f32_f64 (float64_t a)
6914 {
6915   float32_t result;
6916   __asm__ ("fcvtxn %s0,%d1"
6917            : "=w"(result)
6918            : "w"(a)
6919            : /* No clobbers */);
6920   return result;
6921 }
6922 
6923 #define vdup_lane_f32(a, b)                                             \
6924   __extension__                                                         \
6925     ({                                                                  \
6926        float32x2_t a_ = (a);                                            \
6927        float32x2_t result;                                              \
6928        __asm__ ("dup %0.2s,%1.s[%2]"                                    \
6929                 : "=w"(result)                                          \
6930                 : "w"(a_), "i"(b)                                       \
6931                 : /* No clobbers */);                                   \
6932        result;                                                          \
6933      })
6934 
6935 #define vdup_lane_p8(a, b)                                              \
6936   __extension__                                                         \
6937     ({                                                                  \
6938        poly8x8_t a_ = (a);                                              \
6939        poly8x8_t result;                                                \
6940        __asm__ ("dup %0.8b,%1.b[%2]"                                    \
6941                 : "=w"(result)                                          \
6942                 : "w"(a_), "i"(b)                                       \
6943                 : /* No clobbers */);                                   \
6944        result;                                                          \
6945      })
6946 
6947 #define vdup_lane_p16(a, b)                                             \
6948   __extension__                                                         \
6949     ({                                                                  \
6950        poly16x4_t a_ = (a);                                             \
6951        poly16x4_t result;                                               \
6952        __asm__ ("dup %0.4h,%1.h[%2]"                                    \
6953                 : "=w"(result)                                          \
6954                 : "w"(a_), "i"(b)                                       \
6955                 : /* No clobbers */);                                   \
6956        result;                                                          \
6957      })
6958 
6959 #define vdup_lane_s8(a, b)                                              \
6960   __extension__                                                         \
6961     ({                                                                  \
6962        int8x8_t a_ = (a);                                               \
6963        int8x8_t result;                                                 \
6964        __asm__ ("dup %0.8b,%1.b[%2]"                                    \
6965                 : "=w"(result)                                          \
6966                 : "w"(a_), "i"(b)                                       \
6967                 : /* No clobbers */);                                   \
6968        result;                                                          \
6969      })
6970 
6971 #define vdup_lane_s16(a, b)                                             \
6972   __extension__                                                         \
6973     ({                                                                  \
6974        int16x4_t a_ = (a);                                              \
6975        int16x4_t result;                                                \
6976        __asm__ ("dup %0.4h,%1.h[%2]"                                    \
6977                 : "=w"(result)                                          \
6978                 : "w"(a_), "i"(b)                                       \
6979                 : /* No clobbers */);                                   \
6980        result;                                                          \
6981      })
6982 
6983 #define vdup_lane_s32(a, b)                                             \
6984   __extension__                                                         \
6985     ({                                                                  \
6986        int32x2_t a_ = (a);                                              \
6987        int32x2_t result;                                                \
6988        __asm__ ("dup %0.2s,%1.s[%2]"                                    \
6989                 : "=w"(result)                                          \
6990                 : "w"(a_), "i"(b)                                       \
6991                 : /* No clobbers */);                                   \
6992        result;                                                          \
6993      })
6994 
6995 #define vdup_lane_s64(a, b)                                             \
6996   __extension__                                                         \
6997     ({                                                                  \
6998        int64x1_t a_ = (a);                                              \
6999        int64x1_t result;                                                \
7000        __asm__ ("ins %0.d[0],%1.d[%2]"                                  \
7001                 : "=w"(result)                                          \
7002                 : "w"(a_), "i"(b)                                       \
7003                 : /* No clobbers */);                                   \
7004        result;                                                          \
7005      })
7006 
7007 #define vdup_lane_u8(a, b)                                              \
7008   __extension__                                                         \
7009     ({                                                                  \
7010        uint8x8_t a_ = (a);                                              \
7011        uint8x8_t result;                                                \
7012        __asm__ ("dup %0.8b,%1.b[%2]"                                    \
7013                 : "=w"(result)                                          \
7014                 : "w"(a_), "i"(b)                                       \
7015                 : /* No clobbers */);                                   \
7016        result;                                                          \
7017      })
7018 
7019 #define vdup_lane_u16(a, b)                                             \
7020   __extension__                                                         \
7021     ({                                                                  \
7022        uint16x4_t a_ = (a);                                             \
7023        uint16x4_t result;                                               \
7024        __asm__ ("dup %0.4h,%1.h[%2]"                                    \
7025                 : "=w"(result)                                          \
7026                 : "w"(a_), "i"(b)                                       \
7027                 : /* No clobbers */);                                   \
7028        result;                                                          \
7029      })
7030 
7031 #define vdup_lane_u32(a, b)                                             \
7032   __extension__                                                         \
7033     ({                                                                  \
7034        uint32x2_t a_ = (a);                                             \
7035        uint32x2_t result;                                               \
7036        __asm__ ("dup %0.2s,%1.s[%2]"                                    \
7037                 : "=w"(result)                                          \
7038                 : "w"(a_), "i"(b)                                       \
7039                 : /* No clobbers */);                                   \
7040        result;                                                          \
7041      })
7042 
7043 #define vdup_lane_u64(a, b)                                             \
7044   __extension__                                                         \
7045     ({                                                                  \
7046        uint64x1_t a_ = (a);                                             \
7047        uint64x1_t result;                                               \
7048        __asm__ ("ins %0.d[0],%1.d[%2]"                                  \
7049                 : "=w"(result)                                          \
7050                 : "w"(a_), "i"(b)                                       \
7051                 : /* No clobbers */);                                   \
7052        result;                                                          \
7053      })
7054 
7055 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdup_n_f32(float32_t a)7056 vdup_n_f32 (float32_t a)
7057 {
7058   float32x2_t result;
7059   __asm__ ("dup %0.2s, %w1"
7060            : "=w"(result)
7061            : "r"(a)
7062            : /* No clobbers */);
7063   return result;
7064 }
7065 
7066 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vdup_n_p8(uint32_t a)7067 vdup_n_p8 (uint32_t a)
7068 {
7069   poly8x8_t result;
7070   __asm__ ("dup %0.8b,%w1"
7071            : "=w"(result)
7072            : "r"(a)
7073            : /* No clobbers */);
7074   return result;
7075 }
7076 
7077 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vdup_n_p16(uint32_t a)7078 vdup_n_p16 (uint32_t a)
7079 {
7080   poly16x4_t result;
7081   __asm__ ("dup %0.4h,%w1"
7082            : "=w"(result)
7083            : "r"(a)
7084            : /* No clobbers */);
7085   return result;
7086 }
7087 
7088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_n_s8(int32_t a)7089 vdup_n_s8 (int32_t a)
7090 {
7091   int8x8_t result;
7092   __asm__ ("dup %0.8b,%w1"
7093            : "=w"(result)
7094            : "r"(a)
7095            : /* No clobbers */);
7096   return result;
7097 }
7098 
7099 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vdup_n_s16(int32_t a)7100 vdup_n_s16 (int32_t a)
7101 {
7102   int16x4_t result;
7103   __asm__ ("dup %0.4h,%w1"
7104            : "=w"(result)
7105            : "r"(a)
7106            : /* No clobbers */);
7107   return result;
7108 }
7109 
7110 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vdup_n_s32(int32_t a)7111 vdup_n_s32 (int32_t a)
7112 {
7113   int32x2_t result;
7114   __asm__ ("dup %0.2s,%w1"
7115            : "=w"(result)
7116            : "r"(a)
7117            : /* No clobbers */);
7118   return result;
7119 }
7120 
7121 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_n_s64(int64_t a)7122 vdup_n_s64 (int64_t a)
7123 {
7124   int64x1_t result;
7125   __asm__ ("ins %0.d[0],%x1"
7126            : "=w"(result)
7127            : "r"(a)
7128            : /* No clobbers */);
7129   return result;
7130 }
7131 
7132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vdup_n_u8(uint32_t a)7133 vdup_n_u8 (uint32_t a)
7134 {
7135   uint8x8_t result;
7136   __asm__ ("dup %0.8b,%w1"
7137            : "=w"(result)
7138            : "r"(a)
7139            : /* No clobbers */);
7140   return result;
7141 }
7142 
7143 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vdup_n_u16(uint32_t a)7144 vdup_n_u16 (uint32_t a)
7145 {
7146   uint16x4_t result;
7147   __asm__ ("dup %0.4h,%w1"
7148            : "=w"(result)
7149            : "r"(a)
7150            : /* No clobbers */);
7151   return result;
7152 }
7153 
7154 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vdup_n_u32(uint32_t a)7155 vdup_n_u32 (uint32_t a)
7156 {
7157   uint32x2_t result;
7158   __asm__ ("dup %0.2s,%w1"
7159            : "=w"(result)
7160            : "r"(a)
7161            : /* No clobbers */);
7162   return result;
7163 }
7164 
7165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdup_n_u64(uint64_t a)7166 vdup_n_u64 (uint64_t a)
7167 {
7168   uint64x1_t result;
7169   __asm__ ("ins %0.d[0],%x1"
7170            : "=w"(result)
7171            : "r"(a)
7172            : /* No clobbers */);
7173   return result;
7174 }
7175 
7176 #define vdupd_lane_f64(a, b)                                            \
7177   __extension__                                                         \
7178     ({                                                                  \
7179        float64x2_t a_ = (a);                                            \
7180        float64_t result;                                                \
7181        __asm__ ("dup %d0, %1.d[%2]"                                     \
7182                 : "=w"(result)                                          \
7183                 : "w"(a_), "i"(b)                                       \
7184                 : /* No clobbers */);                                   \
7185        result;                                                          \
7186      })
7187 
7188 #define vdupq_lane_f32(a, b)                                            \
7189   __extension__                                                         \
7190     ({                                                                  \
7191        float32x2_t a_ = (a);                                            \
7192        float32x4_t result;                                              \
7193        __asm__ ("dup %0.4s,%1.s[%2]"                                    \
7194                 : "=w"(result)                                          \
7195                 : "w"(a_), "i"(b)                                       \
7196                 : /* No clobbers */);                                   \
7197        result;                                                          \
7198      })
7199 
7200 #define vdupq_lane_f64(a, b)                                            \
7201   __extension__                                                         \
7202     ({                                                                  \
7203        float64x1_t a_ = (a);                                            \
7204        float64x2_t result;                                              \
7205        __asm__ ("dup %0.2d,%1.d[%2]"                                    \
7206                 : "=w"(result)                                          \
7207                 : "w"(a_), "i"(b)                                       \
7208                 : /* No clobbers */);                                   \
7209        result;                                                          \
7210      })
7211 
7212 #define vdupq_lane_p8(a, b)                                             \
7213   __extension__                                                         \
7214     ({                                                                  \
7215        poly8x8_t a_ = (a);                                              \
7216        poly8x16_t result;                                               \
7217        __asm__ ("dup %0.16b,%1.b[%2]"                                   \
7218                 : "=w"(result)                                          \
7219                 : "w"(a_), "i"(b)                                       \
7220                 : /* No clobbers */);                                   \
7221        result;                                                          \
7222      })
7223 
7224 #define vdupq_lane_p16(a, b)                                            \
7225   __extension__                                                         \
7226     ({                                                                  \
7227        poly16x4_t a_ = (a);                                             \
7228        poly16x8_t result;                                               \
7229        __asm__ ("dup %0.8h,%1.h[%2]"                                    \
7230                 : "=w"(result)                                          \
7231                 : "w"(a_), "i"(b)                                       \
7232                 : /* No clobbers */);                                   \
7233        result;                                                          \
7234      })
7235 
7236 #define vdupq_lane_s8(a, b)                                             \
7237   __extension__                                                         \
7238     ({                                                                  \
7239        int8x8_t a_ = (a);                                               \
7240        int8x16_t result;                                                \
7241        __asm__ ("dup %0.16b,%1.b[%2]"                                   \
7242                 : "=w"(result)                                          \
7243                 : "w"(a_), "i"(b)                                       \
7244                 : /* No clobbers */);                                   \
7245        result;                                                          \
7246      })
7247 
7248 #define vdupq_lane_s16(a, b)                                            \
7249   __extension__                                                         \
7250     ({                                                                  \
7251        int16x4_t a_ = (a);                                              \
7252        int16x8_t result;                                                \
7253        __asm__ ("dup %0.8h,%1.h[%2]"                                    \
7254                 : "=w"(result)                                          \
7255                 : "w"(a_), "i"(b)                                       \
7256                 : /* No clobbers */);                                   \
7257        result;                                                          \
7258      })
7259 
7260 #define vdupq_lane_s32(a, b)                                            \
7261   __extension__                                                         \
7262     ({                                                                  \
7263        int32x2_t a_ = (a);                                              \
7264        int32x4_t result;                                                \
7265        __asm__ ("dup %0.4s,%1.s[%2]"                                    \
7266                 : "=w"(result)                                          \
7267                 : "w"(a_), "i"(b)                                       \
7268                 : /* No clobbers */);                                   \
7269        result;                                                          \
7270      })
7271 
7272 #define vdupq_lane_s64(a, b)                                            \
7273   __extension__                                                         \
7274     ({                                                                  \
7275        int64x1_t a_ = (a);                                              \
7276        int64x2_t result;                                                \
7277        __asm__ ("dup %0.2d,%1.d[%2]"                                    \
7278                 : "=w"(result)                                          \
7279                 : "w"(a_), "i"(b)                                       \
7280                 : /* No clobbers */);                                   \
7281        result;                                                          \
7282      })
7283 
7284 #define vdupq_lane_u8(a, b)                                             \
7285   __extension__                                                         \
7286     ({                                                                  \
7287        uint8x8_t a_ = (a);                                              \
7288        uint8x16_t result;                                               \
7289        __asm__ ("dup %0.16b,%1.b[%2]"                                   \
7290                 : "=w"(result)                                          \
7291                 : "w"(a_), "i"(b)                                       \
7292                 : /* No clobbers */);                                   \
7293        result;                                                          \
7294      })
7295 
7296 #define vdupq_lane_u16(a, b)                                            \
7297   __extension__                                                         \
7298     ({                                                                  \
7299        uint16x4_t a_ = (a);                                             \
7300        uint16x8_t result;                                               \
7301        __asm__ ("dup %0.8h,%1.h[%2]"                                    \
7302                 : "=w"(result)                                          \
7303                 : "w"(a_), "i"(b)                                       \
7304                 : /* No clobbers */);                                   \
7305        result;                                                          \
7306      })
7307 
7308 #define vdupq_lane_u32(a, b)                                            \
7309   __extension__                                                         \
7310     ({                                                                  \
7311        uint32x2_t a_ = (a);                                             \
7312        uint32x4_t result;                                               \
7313        __asm__ ("dup %0.4s,%1.s[%2]"                                    \
7314                 : "=w"(result)                                          \
7315                 : "w"(a_), "i"(b)                                       \
7316                 : /* No clobbers */);                                   \
7317        result;                                                          \
7318      })
7319 
7320 #define vdupq_lane_u64(a, b)                                            \
7321   __extension__                                                         \
7322     ({                                                                  \
7323        uint64x1_t a_ = (a);                                             \
7324        uint64x2_t result;                                               \
7325        __asm__ ("dup %0.2d,%1.d[%2]"                                    \
7326                 : "=w"(result)                                          \
7327                 : "w"(a_), "i"(b)                                       \
7328                 : /* No clobbers */);                                   \
7329        result;                                                          \
7330      })
7331 
7332 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdupq_n_f32(float32_t a)7333 vdupq_n_f32 (float32_t a)
7334 {
7335   float32x4_t result;
7336   __asm__ ("dup %0.4s, %w1"
7337            : "=w"(result)
7338            : "r"(a)
7339            : /* No clobbers */);
7340   return result;
7341 }
7342 
7343 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vdupq_n_f64(float64_t a)7344 vdupq_n_f64 (float64_t a)
7345 {
7346   float64x2_t result;
7347   __asm__ ("dup %0.2d, %x1"
7348            : "=w"(result)
7349            : "r"(a)
7350            : /* No clobbers */);
7351   return result;
7352 }
7353 
7354 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vdupq_n_p8(uint32_t a)7355 vdupq_n_p8 (uint32_t a)
7356 {
7357   poly8x16_t result;
7358   __asm__ ("dup %0.16b,%w1"
7359            : "=w"(result)
7360            : "r"(a)
7361            : /* No clobbers */);
7362   return result;
7363 }
7364 
7365 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vdupq_n_p16(uint32_t a)7366 vdupq_n_p16 (uint32_t a)
7367 {
7368   poly16x8_t result;
7369   __asm__ ("dup %0.8h,%w1"
7370            : "=w"(result)
7371            : "r"(a)
7372            : /* No clobbers */);
7373   return result;
7374 }
7375 
7376 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_n_s8(int32_t a)7377 vdupq_n_s8 (int32_t a)
7378 {
7379   int8x16_t result;
7380   __asm__ ("dup %0.16b,%w1"
7381            : "=w"(result)
7382            : "r"(a)
7383            : /* No clobbers */);
7384   return result;
7385 }
7386 
7387 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vdupq_n_s16(int32_t a)7388 vdupq_n_s16 (int32_t a)
7389 {
7390   int16x8_t result;
7391   __asm__ ("dup %0.8h,%w1"
7392            : "=w"(result)
7393            : "r"(a)
7394            : /* No clobbers */);
7395   return result;
7396 }
7397 
7398 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vdupq_n_s32(int32_t a)7399 vdupq_n_s32 (int32_t a)
7400 {
7401   int32x4_t result;
7402   __asm__ ("dup %0.4s,%w1"
7403            : "=w"(result)
7404            : "r"(a)
7405            : /* No clobbers */);
7406   return result;
7407 }
7408 
7409 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vdupq_n_s64(int64_t a)7410 vdupq_n_s64 (int64_t a)
7411 {
7412   int64x2_t result;
7413   __asm__ ("dup %0.2d,%x1"
7414            : "=w"(result)
7415            : "r"(a)
7416            : /* No clobbers */);
7417   return result;
7418 }
7419 
7420 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vdupq_n_u8(uint32_t a)7421 vdupq_n_u8 (uint32_t a)
7422 {
7423   uint8x16_t result;
7424   __asm__ ("dup %0.16b,%w1"
7425            : "=w"(result)
7426            : "r"(a)
7427            : /* No clobbers */);
7428   return result;
7429 }
7430 
7431 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vdupq_n_u16(uint32_t a)7432 vdupq_n_u16 (uint32_t a)
7433 {
7434   uint16x8_t result;
7435   __asm__ ("dup %0.8h,%w1"
7436            : "=w"(result)
7437            : "r"(a)
7438            : /* No clobbers */);
7439   return result;
7440 }
7441 
7442 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vdupq_n_u32(uint32_t a)7443 vdupq_n_u32 (uint32_t a)
7444 {
7445   uint32x4_t result;
7446   __asm__ ("dup %0.4s,%w1"
7447            : "=w"(result)
7448            : "r"(a)
7449            : /* No clobbers */);
7450   return result;
7451 }
7452 
7453 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vdupq_n_u64(uint64_t a)7454 vdupq_n_u64 (uint64_t a)
7455 {
7456   uint64x2_t result;
7457   __asm__ ("dup %0.2d,%x1"
7458            : "=w"(result)
7459            : "r"(a)
7460            : /* No clobbers */);
7461   return result;
7462 }
7463 
7464 #define vdups_lane_f32(a, b)                                            \
7465   __extension__                                                         \
7466     ({                                                                  \
7467        float32x4_t a_ = (a);                                            \
7468        float32_t result;                                                \
7469        __asm__ ("dup %s0, %1.s[%2]"                                     \
7470                 : "=w"(result)                                          \
7471                 : "w"(a_), "i"(b)                                       \
7472                 : /* No clobbers */);                                   \
7473        result;                                                          \
7474      })
7475 
7476 #define vext_f32(a, b, c)                                               \
7477   __extension__                                                         \
7478     ({                                                                  \
7479        float32x2_t b_ = (b);                                            \
7480        float32x2_t a_ = (a);                                            \
7481        float32x2_t result;                                              \
7482        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
7483                 : "=w"(result)                                          \
7484                 : "w"(a_), "w"(b_), "i"(c)                              \
7485                 : /* No clobbers */);                                   \
7486        result;                                                          \
7487      })
7488 
7489 #define vext_f64(a, b, c)                                               \
7490   __extension__                                                         \
7491     ({                                                                  \
7492        float64x1_t b_ = (b);                                            \
7493        float64x1_t a_ = (a);                                            \
7494        float64x1_t result;                                              \
7495        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
7496                 : "=w"(result)                                          \
7497                 : "w"(a_), "w"(b_), "i"(c)                              \
7498                 : /* No clobbers */);                                   \
7499        result;                                                          \
7500      })
7501 
7502 #define vext_p8(a, b, c)                                                \
7503   __extension__                                                         \
7504     ({                                                                  \
7505        poly8x8_t b_ = (b);                                              \
7506        poly8x8_t a_ = (a);                                              \
7507        poly8x8_t result;                                                \
7508        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
7509                 : "=w"(result)                                          \
7510                 : "w"(a_), "w"(b_), "i"(c)                              \
7511                 : /* No clobbers */);                                   \
7512        result;                                                          \
7513      })
7514 
7515 #define vext_p16(a, b, c)                                               \
7516   __extension__                                                         \
7517     ({                                                                  \
7518        poly16x4_t b_ = (b);                                             \
7519        poly16x4_t a_ = (a);                                             \
7520        poly16x4_t result;                                               \
7521        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
7522                 : "=w"(result)                                          \
7523                 : "w"(a_), "w"(b_), "i"(c)                              \
7524                 : /* No clobbers */);                                   \
7525        result;                                                          \
7526      })
7527 
7528 #define vext_s8(a, b, c)                                                \
7529   __extension__                                                         \
7530     ({                                                                  \
7531        int8x8_t b_ = (b);                                               \
7532        int8x8_t a_ = (a);                                               \
7533        int8x8_t result;                                                 \
7534        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
7535                 : "=w"(result)                                          \
7536                 : "w"(a_), "w"(b_), "i"(c)                              \
7537                 : /* No clobbers */);                                   \
7538        result;                                                          \
7539      })
7540 
7541 #define vext_s16(a, b, c)                                               \
7542   __extension__                                                         \
7543     ({                                                                  \
7544        int16x4_t b_ = (b);                                              \
7545        int16x4_t a_ = (a);                                              \
7546        int16x4_t result;                                                \
7547        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
7548                 : "=w"(result)                                          \
7549                 : "w"(a_), "w"(b_), "i"(c)                              \
7550                 : /* No clobbers */);                                   \
7551        result;                                                          \
7552      })
7553 
7554 #define vext_s32(a, b, c)                                               \
7555   __extension__                                                         \
7556     ({                                                                  \
7557        int32x2_t b_ = (b);                                              \
7558        int32x2_t a_ = (a);                                              \
7559        int32x2_t result;                                                \
7560        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
7561                 : "=w"(result)                                          \
7562                 : "w"(a_), "w"(b_), "i"(c)                              \
7563                 : /* No clobbers */);                                   \
7564        result;                                                          \
7565      })
7566 
7567 #define vext_s64(a, b, c)                                               \
7568   __extension__                                                         \
7569     ({                                                                  \
7570        int64x1_t b_ = (b);                                              \
7571        int64x1_t a_ = (a);                                              \
7572        int64x1_t result;                                                \
7573        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
7574                 : "=w"(result)                                          \
7575                 : "w"(a_), "w"(b_), "i"(c)                              \
7576                 : /* No clobbers */);                                   \
7577        result;                                                          \
7578      })
7579 
7580 #define vext_u8(a, b, c)                                                \
7581   __extension__                                                         \
7582     ({                                                                  \
7583        uint8x8_t b_ = (b);                                              \
7584        uint8x8_t a_ = (a);                                              \
7585        uint8x8_t result;                                                \
7586        __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
7587                 : "=w"(result)                                          \
7588                 : "w"(a_), "w"(b_), "i"(c)                              \
7589                 : /* No clobbers */);                                   \
7590        result;                                                          \
7591      })
7592 
7593 #define vext_u16(a, b, c)                                               \
7594   __extension__                                                         \
7595     ({                                                                  \
7596        uint16x4_t b_ = (b);                                             \
7597        uint16x4_t a_ = (a);                                             \
7598        uint16x4_t result;                                               \
7599        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
7600                 : "=w"(result)                                          \
7601                 : "w"(a_), "w"(b_), "i"(c)                              \
7602                 : /* No clobbers */);                                   \
7603        result;                                                          \
7604      })
7605 
7606 #define vext_u32(a, b, c)                                               \
7607   __extension__                                                         \
7608     ({                                                                  \
7609        uint32x2_t b_ = (b);                                             \
7610        uint32x2_t a_ = (a);                                             \
7611        uint32x2_t result;                                               \
7612        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
7613                 : "=w"(result)                                          \
7614                 : "w"(a_), "w"(b_), "i"(c)                              \
7615                 : /* No clobbers */);                                   \
7616        result;                                                          \
7617      })
7618 
7619 #define vext_u64(a, b, c)                                               \
7620   __extension__                                                         \
7621     ({                                                                  \
7622        uint64x1_t b_ = (b);                                             \
7623        uint64x1_t a_ = (a);                                             \
7624        uint64x1_t result;                                               \
7625        __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
7626                 : "=w"(result)                                          \
7627                 : "w"(a_), "w"(b_), "i"(c)                              \
7628                 : /* No clobbers */);                                   \
7629        result;                                                          \
7630      })
7631 
7632 #define vextq_f32(a, b, c)                                              \
7633   __extension__                                                         \
7634     ({                                                                  \
7635        float32x4_t b_ = (b);                                            \
7636        float32x4_t a_ = (a);                                            \
7637        float32x4_t result;                                              \
7638        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
7639                 : "=w"(result)                                          \
7640                 : "w"(a_), "w"(b_), "i"(c)                              \
7641                 : /* No clobbers */);                                   \
7642        result;                                                          \
7643      })
7644 
7645 #define vextq_f64(a, b, c)                                              \
7646   __extension__                                                         \
7647     ({                                                                  \
7648        float64x2_t b_ = (b);                                            \
7649        float64x2_t a_ = (a);                                            \
7650        float64x2_t result;                                              \
7651        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
7652                 : "=w"(result)                                          \
7653                 : "w"(a_), "w"(b_), "i"(c)                              \
7654                 : /* No clobbers */);                                   \
7655        result;                                                          \
7656      })
7657 
7658 #define vextq_p8(a, b, c)                                               \
7659   __extension__                                                         \
7660     ({                                                                  \
7661        poly8x16_t b_ = (b);                                             \
7662        poly8x16_t a_ = (a);                                             \
7663        poly8x16_t result;                                               \
7664        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
7665                 : "=w"(result)                                          \
7666                 : "w"(a_), "w"(b_), "i"(c)                              \
7667                 : /* No clobbers */);                                   \
7668        result;                                                          \
7669      })
7670 
7671 #define vextq_p16(a, b, c)                                              \
7672   __extension__                                                         \
7673     ({                                                                  \
7674        poly16x8_t b_ = (b);                                             \
7675        poly16x8_t a_ = (a);                                             \
7676        poly16x8_t result;                                               \
7677        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
7678                 : "=w"(result)                                          \
7679                 : "w"(a_), "w"(b_), "i"(c)                              \
7680                 : /* No clobbers */);                                   \
7681        result;                                                          \
7682      })
7683 
7684 #define vextq_s8(a, b, c)                                               \
7685   __extension__                                                         \
7686     ({                                                                  \
7687        int8x16_t b_ = (b);                                              \
7688        int8x16_t a_ = (a);                                              \
7689        int8x16_t result;                                                \
7690        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
7691                 : "=w"(result)                                          \
7692                 : "w"(a_), "w"(b_), "i"(c)                              \
7693                 : /* No clobbers */);                                   \
7694        result;                                                          \
7695      })
7696 
7697 #define vextq_s16(a, b, c)                                              \
7698   __extension__                                                         \
7699     ({                                                                  \
7700        int16x8_t b_ = (b);                                              \
7701        int16x8_t a_ = (a);                                              \
7702        int16x8_t result;                                                \
7703        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
7704                 : "=w"(result)                                          \
7705                 : "w"(a_), "w"(b_), "i"(c)                              \
7706                 : /* No clobbers */);                                   \
7707        result;                                                          \
7708      })
7709 
7710 #define vextq_s32(a, b, c)                                              \
7711   __extension__                                                         \
7712     ({                                                                  \
7713        int32x4_t b_ = (b);                                              \
7714        int32x4_t a_ = (a);                                              \
7715        int32x4_t result;                                                \
7716        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
7717                 : "=w"(result)                                          \
7718                 : "w"(a_), "w"(b_), "i"(c)                              \
7719                 : /* No clobbers */);                                   \
7720        result;                                                          \
7721      })
7722 
7723 #define vextq_s64(a, b, c)                                              \
7724   __extension__                                                         \
7725     ({                                                                  \
7726        int64x2_t b_ = (b);                                              \
7727        int64x2_t a_ = (a);                                              \
7728        int64x2_t result;                                                \
7729        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
7730                 : "=w"(result)                                          \
7731                 : "w"(a_), "w"(b_), "i"(c)                              \
7732                 : /* No clobbers */);                                   \
7733        result;                                                          \
7734      })
7735 
7736 #define vextq_u8(a, b, c)                                               \
7737   __extension__                                                         \
7738     ({                                                                  \
7739        uint8x16_t b_ = (b);                                             \
7740        uint8x16_t a_ = (a);                                             \
7741        uint8x16_t result;                                               \
7742        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
7743                 : "=w"(result)                                          \
7744                 : "w"(a_), "w"(b_), "i"(c)                              \
7745                 : /* No clobbers */);                                   \
7746        result;                                                          \
7747      })
7748 
7749 #define vextq_u16(a, b, c)                                              \
7750   __extension__                                                         \
7751     ({                                                                  \
7752        uint16x8_t b_ = (b);                                             \
7753        uint16x8_t a_ = (a);                                             \
7754        uint16x8_t result;                                               \
7755        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
7756                 : "=w"(result)                                          \
7757                 : "w"(a_), "w"(b_), "i"(c)                              \
7758                 : /* No clobbers */);                                   \
7759        result;                                                          \
7760      })
7761 
7762 #define vextq_u32(a, b, c)                                              \
7763   __extension__                                                         \
7764     ({                                                                  \
7765        uint32x4_t b_ = (b);                                             \
7766        uint32x4_t a_ = (a);                                             \
7767        uint32x4_t result;                                               \
7768        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
7769                 : "=w"(result)                                          \
7770                 : "w"(a_), "w"(b_), "i"(c)                              \
7771                 : /* No clobbers */);                                   \
7772        result;                                                          \
7773      })
7774 
7775 #define vextq_u64(a, b, c)                                              \
7776   __extension__                                                         \
7777     ({                                                                  \
7778        uint64x2_t b_ = (b);                                             \
7779        uint64x2_t a_ = (a);                                             \
7780        uint64x2_t result;                                               \
7781        __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
7782                 : "=w"(result)                                          \
7783                 : "w"(a_), "w"(b_), "i"(c)                              \
7784                 : /* No clobbers */);                                   \
7785        result;                                                          \
7786      })
7787 
7788 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)7789 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
7790 {
7791   float32x2_t result;
7792   __asm__ ("fmla %0.2s,%2.2s,%3.2s"
7793            : "=w"(result)
7794            : "0"(a), "w"(b), "w"(c)
7795            : /* No clobbers */);
7796   return result;
7797 }
7798 
7799 #define vfma_lane_f32(a, b, c, d)                                       \
7800   __extension__                                                         \
7801     ({                                                                  \
7802        float32x2_t c_ = (c);                                            \
7803        float32x2_t b_ = (b);                                            \
7804        float32x2_t a_ = (a);                                            \
7805        float32x2_t result;                                              \
7806        __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]"                             \
7807                 : "=w"(result)                                          \
7808                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7809                 : /* No clobbers */);                                   \
7810        result;                                                          \
7811      })
7812 
7813 #define vfmad_lane_f64(a, b, c)                                         \
7814   __extension__                                                         \
7815     ({                                                                  \
7816        float64x2_t b_ = (b);                                            \
7817        float64_t a_ = (a);                                              \
7818        float64_t result;                                                \
7819        __asm__ ("fmla %d0,%d1,%2.d[%3]"                                 \
7820                 : "=w"(result)                                          \
7821                 : "w"(a_), "w"(b_), "i"(c)                              \
7822                 : /* No clobbers */);                                   \
7823        result;                                                          \
7824      })
7825 
7826 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7827 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
7828 {
7829   float32x4_t result;
7830   __asm__ ("fmla %0.4s,%2.4s,%3.4s"
7831            : "=w"(result)
7832            : "0"(a), "w"(b), "w"(c)
7833            : /* No clobbers */);
7834   return result;
7835 }
7836 
7837 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_f64(float64x2_t a,float64x2_t b,float64x2_t c)7838 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
7839 {
7840   float64x2_t result;
7841   __asm__ ("fmla %0.2d,%2.2d,%3.2d"
7842            : "=w"(result)
7843            : "0"(a), "w"(b), "w"(c)
7844            : /* No clobbers */);
7845   return result;
7846 }
7847 
7848 #define vfmaq_lane_f32(a, b, c, d)                                      \
7849   __extension__                                                         \
7850     ({                                                                  \
7851        float32x4_t c_ = (c);                                            \
7852        float32x4_t b_ = (b);                                            \
7853        float32x4_t a_ = (a);                                            \
7854        float32x4_t result;                                              \
7855        __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]"                             \
7856                 : "=w"(result)                                          \
7857                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7858                 : /* No clobbers */);                                   \
7859        result;                                                          \
7860      })
7861 
7862 #define vfmaq_lane_f64(a, b, c, d)                                      \
7863   __extension__                                                         \
7864     ({                                                                  \
7865        float64x2_t c_ = (c);                                            \
7866        float64x2_t b_ = (b);                                            \
7867        float64x2_t a_ = (a);                                            \
7868        float64x2_t result;                                              \
7869        __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]"                             \
7870                 : "=w"(result)                                          \
7871                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
7872                 : /* No clobbers */);                                   \
7873        result;                                                          \
7874      })
7875 
7876 #define vfmas_lane_f32(a, b, c)                                         \
7877   __extension__                                                         \
7878     ({                                                                  \
7879        float32x4_t b_ = (b);                                            \
7880        float32_t a_ = (a);                                              \
7881        float32_t result;                                                \
7882        __asm__ ("fmla %s0,%s1,%2.s[%3]"                                 \
7883                 : "=w"(result)                                          \
7884                 : "w"(a_), "w"(b_), "i"(c)                              \
7885                 : /* No clobbers */);                                   \
7886        result;                                                          \
7887      })
7888 
7889 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_n_f32(float32x2_t a,float32x2_t b,float32_t c)7890 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7891 {
7892   float32x2_t result;
7893   __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
7894            : "=w"(result)
7895            : "0"(a), "w"(b), "w"(c)
7896            : /* No clobbers */);
7897   return result;
7898 }
7899 
7900 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7901 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7902 {
7903   float32x4_t result;
7904   __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
7905            : "=w"(result)
7906            : "0"(a), "w"(b), "w"(c)
7907            : /* No clobbers */);
7908   return result;
7909 }
7910 
7911 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)7912 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7913 {
7914   float64x2_t result;
7915   __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
7916            : "=w"(result)
7917            : "0"(a), "w"(b), "w"(c)
7918            : /* No clobbers */);
7919   return result;
7920 }
7921 
7922 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)7923 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
7924 {
7925   float32x2_t result;
7926   __asm__ ("fmls %0.2s,%2.2s,%3.2s"
7927            : "=w"(result)
7928            : "0"(a), "w"(b), "w"(c)
7929            : /* No clobbers */);
7930   return result;
7931 }
7932 
7933 #define vfmsd_lane_f64(a, b, c)                                         \
7934   __extension__                                                         \
7935     ({                                                                  \
7936        float64x2_t b_ = (b);                                            \
7937        float64_t a_ = (a);                                              \
7938        float64_t result;                                                \
7939        __asm__ ("fmls %d0,%d1,%2.d[%3]"                                 \
7940                 : "=w"(result)                                          \
7941                 : "w"(a_), "w"(b_), "i"(c)                              \
7942                 : /* No clobbers */);                                   \
7943        result;                                                          \
7944      })
7945 
7946 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7947 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
7948 {
7949   float32x4_t result;
7950   __asm__ ("fmls %0.4s,%2.4s,%3.4s"
7951            : "=w"(result)
7952            : "0"(a), "w"(b), "w"(c)
7953            : /* No clobbers */);
7954   return result;
7955 }
7956 
7957 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmsq_f64(float64x2_t a,float64x2_t b,float64x2_t c)7958 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
7959 {
7960   float64x2_t result;
7961   __asm__ ("fmls %0.2d,%2.2d,%3.2d"
7962            : "=w"(result)
7963            : "0"(a), "w"(b), "w"(c)
7964            : /* No clobbers */);
7965   return result;
7966 }
7967 
7968 #define vfmss_lane_f32(a, b, c)                                         \
7969   __extension__                                                         \
7970     ({                                                                  \
7971        float32x4_t b_ = (b);                                            \
7972        float32_t a_ = (a);                                              \
7973        float32_t result;                                                \
7974        __asm__ ("fmls %s0,%s1,%2.s[%3]"                                 \
7975                 : "=w"(result)                                          \
7976                 : "w"(a_), "w"(b_), "i"(c)                              \
7977                 : /* No clobbers */);                                   \
7978        result;                                                          \
7979      })
7980 
7981 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32(float32x4_t a)7982 vget_high_f32 (float32x4_t a)
7983 {
7984   float32x2_t result;
7985   __asm__ ("ins %0.d[0], %1.d[1]"
7986            : "=w"(result)
7987            : "w"(a)
7988            : /* No clobbers */);
7989   return result;
7990 }
7991 
7992 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vget_high_f64(float64x2_t a)7993 vget_high_f64 (float64x2_t a)
7994 {
7995   float64x1_t result;
7996   __asm__ ("ins %0.d[0], %1.d[1]"
7997            : "=w"(result)
7998            : "w"(a)
7999            : /* No clobbers */);
8000   return result;
8001 }
8002 
8003 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vget_high_p8(poly8x16_t a)8004 vget_high_p8 (poly8x16_t a)
8005 {
8006   poly8x8_t result;
8007   __asm__ ("ins %0.d[0], %1.d[1]"
8008            : "=w"(result)
8009            : "w"(a)
8010            : /* No clobbers */);
8011   return result;
8012 }
8013 
8014 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vget_high_p16(poly16x8_t a)8015 vget_high_p16 (poly16x8_t a)
8016 {
8017   poly16x4_t result;
8018   __asm__ ("ins %0.d[0], %1.d[1]"
8019            : "=w"(result)
8020            : "w"(a)
8021            : /* No clobbers */);
8022   return result;
8023 }
8024 
8025 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_high_s8(int8x16_t a)8026 vget_high_s8 (int8x16_t a)
8027 {
8028   int8x8_t result;
8029   __asm__ ("ins %0.d[0], %1.d[1]"
8030            : "=w"(result)
8031            : "w"(a)
8032            : /* No clobbers */);
8033   return result;
8034 }
8035 
8036 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vget_high_s16(int16x8_t a)8037 vget_high_s16 (int16x8_t a)
8038 {
8039   int16x4_t result;
8040   __asm__ ("ins %0.d[0], %1.d[1]"
8041            : "=w"(result)
8042            : "w"(a)
8043            : /* No clobbers */);
8044   return result;
8045 }
8046 
8047 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vget_high_s32(int32x4_t a)8048 vget_high_s32 (int32x4_t a)
8049 {
8050   int32x2_t result;
8051   __asm__ ("ins %0.d[0], %1.d[1]"
8052            : "=w"(result)
8053            : "w"(a)
8054            : /* No clobbers */);
8055   return result;
8056 }
8057 
8058 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vget_high_s64(int64x2_t a)8059 vget_high_s64 (int64x2_t a)
8060 {
8061   int64x1_t result;
8062   __asm__ ("ins %0.d[0], %1.d[1]"
8063            : "=w"(result)
8064            : "w"(a)
8065            : /* No clobbers */);
8066   return result;
8067 }
8068 
8069 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vget_high_u8(uint8x16_t a)8070 vget_high_u8 (uint8x16_t a)
8071 {
8072   uint8x8_t result;
8073   __asm__ ("ins %0.d[0], %1.d[1]"
8074            : "=w"(result)
8075            : "w"(a)
8076            : /* No clobbers */);
8077   return result;
8078 }
8079 
8080 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vget_high_u16(uint16x8_t a)8081 vget_high_u16 (uint16x8_t a)
8082 {
8083   uint16x4_t result;
8084   __asm__ ("ins %0.d[0], %1.d[1]"
8085            : "=w"(result)
8086            : "w"(a)
8087            : /* No clobbers */);
8088   return result;
8089 }
8090 
8091 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vget_high_u32(uint32x4_t a)8092 vget_high_u32 (uint32x4_t a)
8093 {
8094   uint32x2_t result;
8095   __asm__ ("ins %0.d[0], %1.d[1]"
8096            : "=w"(result)
8097            : "w"(a)
8098            : /* No clobbers */);
8099   return result;
8100 }
8101 
8102 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vget_high_u64(uint64x2_t a)8103 vget_high_u64 (uint64x2_t a)
8104 {
8105   uint64x1_t result;
8106   __asm__ ("ins %0.d[0], %1.d[1]"
8107            : "=w"(result)
8108            : "w"(a)
8109            : /* No clobbers */);
8110   return result;
8111 }
8112 
8113 #define vget_lane_f64(a, b)                                             \
8114   __extension__                                                         \
8115     ({                                                                  \
8116        float64x1_t a_ = (a);                                            \
8117        float64_t result;                                                \
8118        __asm__ ("umov %x0, %1.d[%2]"                                    \
8119                 : "=r"(result)                                          \
8120                 : "w"(a_), "i"(b)                                       \
8121                 : /* No clobbers */);                                   \
8122        result;                                                          \
8123      })
8124 
8125 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32(float32x4_t a)8126 vget_low_f32 (float32x4_t a)
8127 {
8128   float32x2_t result;
8129   __asm__ ("ins %0.d[0], %1.d[0]"
8130            : "=w"(result)
8131            : "w"(a)
8132            : /* No clobbers */);
8133   return result;
8134 }
8135 
8136 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vget_low_f64(float64x2_t a)8137 vget_low_f64 (float64x2_t a)
8138 {
8139   float64x1_t result;
8140   __asm__ ("ins %0.d[0], %1.d[0]"
8141            : "=w"(result)
8142            : "w"(a)
8143            : /* No clobbers */);
8144   return result;
8145 }
8146 
8147 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vget_low_p8(poly8x16_t a)8148 vget_low_p8 (poly8x16_t a)
8149 {
8150   poly8x8_t result;
8151   __asm__ ("ins %0.d[0], %1.d[0]"
8152            : "=w"(result)
8153            : "w"(a)
8154            : /* No clobbers */);
8155   return result;
8156 }
8157 
8158 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vget_low_p16(poly16x8_t a)8159 vget_low_p16 (poly16x8_t a)
8160 {
8161   poly16x4_t result;
8162   __asm__ ("ins %0.d[0], %1.d[0]"
8163            : "=w"(result)
8164            : "w"(a)
8165            : /* No clobbers */);
8166   return result;
8167 }
8168 
8169 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_low_s8(int8x16_t a)8170 vget_low_s8 (int8x16_t a)
8171 {
8172   int8x8_t result;
8173   __asm__ ("ins %0.d[0], %1.d[0]"
8174            : "=w"(result)
8175            : "w"(a)
8176            : /* No clobbers */);
8177   return result;
8178 }
8179 
8180 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vget_low_s16(int16x8_t a)8181 vget_low_s16 (int16x8_t a)
8182 {
8183   int16x4_t result;
8184   __asm__ ("ins %0.d[0], %1.d[0]"
8185            : "=w"(result)
8186            : "w"(a)
8187            : /* No clobbers */);
8188   return result;
8189 }
8190 
8191 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vget_low_s32(int32x4_t a)8192 vget_low_s32 (int32x4_t a)
8193 {
8194   int32x2_t result;
8195   __asm__ ("ins %0.d[0], %1.d[0]"
8196            : "=w"(result)
8197            : "w"(a)
8198            : /* No clobbers */);
8199   return result;
8200 }
8201 
8202 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vget_low_s64(int64x2_t a)8203 vget_low_s64 (int64x2_t a)
8204 {
8205   int64x1_t result;
8206   __asm__ ("ins %0.d[0], %1.d[0]"
8207            : "=w"(result)
8208            : "w"(a)
8209            : /* No clobbers */);
8210   return result;
8211 }
8212 
8213 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vget_low_u8(uint8x16_t a)8214 vget_low_u8 (uint8x16_t a)
8215 {
8216   uint8x8_t result;
8217   __asm__ ("ins %0.d[0], %1.d[0]"
8218            : "=w"(result)
8219            : "w"(a)
8220            : /* No clobbers */);
8221   return result;
8222 }
8223 
8224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vget_low_u16(uint16x8_t a)8225 vget_low_u16 (uint16x8_t a)
8226 {
8227   uint16x4_t result;
8228   __asm__ ("ins %0.d[0], %1.d[0]"
8229            : "=w"(result)
8230            : "w"(a)
8231            : /* No clobbers */);
8232   return result;
8233 }
8234 
8235 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vget_low_u32(uint32x4_t a)8236 vget_low_u32 (uint32x4_t a)
8237 {
8238   uint32x2_t result;
8239   __asm__ ("ins %0.d[0], %1.d[0]"
8240            : "=w"(result)
8241            : "w"(a)
8242            : /* No clobbers */);
8243   return result;
8244 }
8245 
8246 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vget_low_u64(uint64x2_t a)8247 vget_low_u64 (uint64x2_t a)
8248 {
8249   uint64x1_t result;
8250   __asm__ ("ins %0.d[0], %1.d[0]"
8251            : "=w"(result)
8252            : "w"(a)
8253            : /* No clobbers */);
8254   return result;
8255 }
8256 
8257 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vhsub_s8(int8x8_t a,int8x8_t b)8258 vhsub_s8 (int8x8_t a, int8x8_t b)
8259 {
8260   int8x8_t result;
8261   __asm__ ("shsub %0.8b, %1.8b, %2.8b"
8262            : "=w"(result)
8263            : "w"(a), "w"(b)
8264            : /* No clobbers */);
8265   return result;
8266 }
8267 
8268 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vhsub_s16(int16x4_t a,int16x4_t b)8269 vhsub_s16 (int16x4_t a, int16x4_t b)
8270 {
8271   int16x4_t result;
8272   __asm__ ("shsub %0.4h, %1.4h, %2.4h"
8273            : "=w"(result)
8274            : "w"(a), "w"(b)
8275            : /* No clobbers */);
8276   return result;
8277 }
8278 
8279 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vhsub_s32(int32x2_t a,int32x2_t b)8280 vhsub_s32 (int32x2_t a, int32x2_t b)
8281 {
8282   int32x2_t result;
8283   __asm__ ("shsub %0.2s, %1.2s, %2.2s"
8284            : "=w"(result)
8285            : "w"(a), "w"(b)
8286            : /* No clobbers */);
8287   return result;
8288 }
8289 
8290 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vhsub_u8(uint8x8_t a,uint8x8_t b)8291 vhsub_u8 (uint8x8_t a, uint8x8_t b)
8292 {
8293   uint8x8_t result;
8294   __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
8295            : "=w"(result)
8296            : "w"(a), "w"(b)
8297            : /* No clobbers */);
8298   return result;
8299 }
8300 
8301 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vhsub_u16(uint16x4_t a,uint16x4_t b)8302 vhsub_u16 (uint16x4_t a, uint16x4_t b)
8303 {
8304   uint16x4_t result;
8305   __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
8306            : "=w"(result)
8307            : "w"(a), "w"(b)
8308            : /* No clobbers */);
8309   return result;
8310 }
8311 
8312 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vhsub_u32(uint32x2_t a,uint32x2_t b)8313 vhsub_u32 (uint32x2_t a, uint32x2_t b)
8314 {
8315   uint32x2_t result;
8316   __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
8317            : "=w"(result)
8318            : "w"(a), "w"(b)
8319            : /* No clobbers */);
8320   return result;
8321 }
8322 
8323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vhsubq_s8(int8x16_t a,int8x16_t b)8324 vhsubq_s8 (int8x16_t a, int8x16_t b)
8325 {
8326   int8x16_t result;
8327   __asm__ ("shsub %0.16b, %1.16b, %2.16b"
8328            : "=w"(result)
8329            : "w"(a), "w"(b)
8330            : /* No clobbers */);
8331   return result;
8332 }
8333 
8334 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vhsubq_s16(int16x8_t a,int16x8_t b)8335 vhsubq_s16 (int16x8_t a, int16x8_t b)
8336 {
8337   int16x8_t result;
8338   __asm__ ("shsub %0.8h, %1.8h, %2.8h"
8339            : "=w"(result)
8340            : "w"(a), "w"(b)
8341            : /* No clobbers */);
8342   return result;
8343 }
8344 
8345 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vhsubq_s32(int32x4_t a,int32x4_t b)8346 vhsubq_s32 (int32x4_t a, int32x4_t b)
8347 {
8348   int32x4_t result;
8349   __asm__ ("shsub %0.4s, %1.4s, %2.4s"
8350            : "=w"(result)
8351            : "w"(a), "w"(b)
8352            : /* No clobbers */);
8353   return result;
8354 }
8355 
8356 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vhsubq_u8(uint8x16_t a,uint8x16_t b)8357 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
8358 {
8359   uint8x16_t result;
8360   __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
8361            : "=w"(result)
8362            : "w"(a), "w"(b)
8363            : /* No clobbers */);
8364   return result;
8365 }
8366 
8367 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vhsubq_u16(uint16x8_t a,uint16x8_t b)8368 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
8369 {
8370   uint16x8_t result;
8371   __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
8372            : "=w"(result)
8373            : "w"(a), "w"(b)
8374            : /* No clobbers */);
8375   return result;
8376 }
8377 
8378 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vhsubq_u32(uint32x4_t a,uint32x4_t b)8379 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
8380 {
8381   uint32x4_t result;
8382   __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
8383            : "=w"(result)
8384            : "w"(a), "w"(b)
8385            : /* No clobbers */);
8386   return result;
8387 }
8388 
8389 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32(const float32_t * a)8390 vld1_dup_f32 (const float32_t * a)
8391 {
8392   float32x2_t result;
8393   __asm__ ("ld1r {%0.2s}, %1"
8394 	   : "=w"(result)
8395 	   : "Utv"(*a)
8396 	   : /* No clobbers */);
8397   return result;
8398 }
8399 
8400 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_dup_f64(const float64_t * a)8401 vld1_dup_f64 (const float64_t * a)
8402 {
8403   float64x1_t result;
8404   __asm__ ("ld1r {%0.1d}, %1"
8405 	   : "=w"(result)
8406 	   : "Utv"(*a)
8407 	   : /* No clobbers */);
8408   return result;
8409 }
8410 
8411 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_dup_p8(const poly8_t * a)8412 vld1_dup_p8 (const poly8_t * a)
8413 {
8414   poly8x8_t result;
8415   __asm__ ("ld1r {%0.8b}, %1"
8416 	   : "=w"(result)
8417 	   : "Utv"(*a)
8418 	   : /* No clobbers */);
8419   return result;
8420 }
8421 
8422 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_dup_p16(const poly16_t * a)8423 vld1_dup_p16 (const poly16_t * a)
8424 {
8425   poly16x4_t result;
8426   __asm__ ("ld1r {%0.4h}, %1"
8427 	   : "=w"(result)
8428 	   : "Utv"(*a)
8429 	   : /* No clobbers */);
8430   return result;
8431 }
8432 
8433 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_dup_s8(const int8_t * a)8434 vld1_dup_s8 (const int8_t * a)
8435 {
8436   int8x8_t result;
8437   __asm__ ("ld1r {%0.8b}, %1"
8438 	   : "=w"(result)
8439 	   : "Utv"(*a)
8440 	   : /* No clobbers */);
8441   return result;
8442 }
8443 
8444 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_dup_s16(const int16_t * a)8445 vld1_dup_s16 (const int16_t * a)
8446 {
8447   int16x4_t result;
8448   __asm__ ("ld1r {%0.4h}, %1"
8449 	   : "=w"(result)
8450 	   : "Utv"(*a)
8451 	   : /* No clobbers */);
8452   return result;
8453 }
8454 
8455 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_dup_s32(const int32_t * a)8456 vld1_dup_s32 (const int32_t * a)
8457 {
8458   int32x2_t result;
8459   __asm__ ("ld1r {%0.2s}, %1"
8460 	   : "=w"(result)
8461 	   : "Utv"(*a)
8462 	   : /* No clobbers */);
8463   return result;
8464 }
8465 
8466 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_dup_s64(const int64_t * a)8467 vld1_dup_s64 (const int64_t * a)
8468 {
8469   int64x1_t result;
8470   __asm__ ("ld1r {%0.1d}, %1"
8471 	   : "=w"(result)
8472 	   : "Utv"(*a)
8473 	   : /* No clobbers */);
8474   return result;
8475 }
8476 
8477 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_dup_u8(const uint8_t * a)8478 vld1_dup_u8 (const uint8_t * a)
8479 {
8480   uint8x8_t result;
8481   __asm__ ("ld1r {%0.8b}, %1"
8482 	   : "=w"(result)
8483 	   : "Utv"(*a)
8484 	   : /* No clobbers */);
8485   return result;
8486 }
8487 
8488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_dup_u16(const uint16_t * a)8489 vld1_dup_u16 (const uint16_t * a)
8490 {
8491   uint16x4_t result;
8492   __asm__ ("ld1r {%0.4h}, %1"
8493 	   : "=w"(result)
8494 	   : "Utv"(*a)
8495 	   : /* No clobbers */);
8496   return result;
8497 }
8498 
8499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_dup_u32(const uint32_t * a)8500 vld1_dup_u32 (const uint32_t * a)
8501 {
8502   uint32x2_t result;
8503   __asm__ ("ld1r {%0.2s}, %1"
8504 	   : "=w"(result)
8505 	   : "Utv"(*a)
8506 	   : /* No clobbers */);
8507   return result;
8508 }
8509 
8510 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_dup_u64(const uint64_t * a)8511 vld1_dup_u64 (const uint64_t * a)
8512 {
8513   uint64x1_t result;
8514   __asm__ ("ld1r {%0.1d}, %1"
8515 	   : "=w"(result)
8516 	   : "Utv"(*a)
8517 	   : /* No clobbers */);
8518   return result;
8519 }
8520 
8521 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32(const float32_t * a)8522 vld1_f32 (const float32_t * a)
8523 {
8524   float32x2_t result;
8525   __asm__ ("ld1 {%0.2s}, %1"
8526 	   : "=w"(result)
8527 	   : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
8528 	   : /* No clobbers */);
8529   return result;
8530 }
8531 
8532 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_f64(const float64_t * a)8533 vld1_f64 (const float64_t * a)
8534 {
8535   float64x1_t result;
8536   __asm__ ("ld1 {%0.1d}, %1"
8537 	   : "=w"(result)
8538 	   : "Utv"(*a)
8539 	   : /* No clobbers */);
8540   return result;
8541 }
8542 
8543 #define vld1_lane_f32(a, b, c)                                          \
8544   __extension__                                                         \
8545     ({                                                                  \
8546        float32x2_t b_ = (b);                                            \
8547        const float32_t * a_ = (a);                                      \
8548        float32x2_t result;                                              \
8549        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
8550                 : "=w"(result)                                          \
8551                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8552                 : /* No clobbers */);                                   \
8553        result;                                                          \
8554      })
8555 
8556 #define vld1_lane_f64(a, b, c)                                          \
8557   __extension__                                                         \
8558     ({                                                                  \
8559        float64x1_t b_ = (b);                                            \
8560        const float64_t * a_ = (a);                                      \
8561        float64x1_t result;                                              \
8562        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
8563                 : "=w"(result)                                          \
8564                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8565                 : /* No clobbers */);                                   \
8566        result;                                                          \
8567      })
8568 
8569 #define vld1_lane_p8(a, b, c)                                           \
8570   __extension__                                                         \
8571     ({                                                                  \
8572        poly8x8_t b_ = (b);                                              \
8573        const poly8_t * a_ = (a);                                        \
8574        poly8x8_t result;                                                \
8575        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
8576                 : "=w"(result)                                          \
8577                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8578                 : /* No clobbers */);                                   \
8579        result;                                                          \
8580      })
8581 
8582 #define vld1_lane_p16(a, b, c)                                          \
8583   __extension__                                                         \
8584     ({                                                                  \
8585        poly16x4_t b_ = (b);                                             \
8586        const poly16_t * a_ = (a);                                       \
8587        poly16x4_t result;                                               \
8588        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
8589                 : "=w"(result)                                          \
8590                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8591                 : /* No clobbers */);                                   \
8592        result;                                                          \
8593      })
8594 
8595 #define vld1_lane_s8(a, b, c)                                           \
8596   __extension__                                                         \
8597     ({                                                                  \
8598        int8x8_t b_ = (b);                                               \
8599        const int8_t * a_ = (a);                                         \
8600        int8x8_t result;                                                 \
8601        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
8602                 : "=w"(result)                                          \
8603                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8604                 : /* No clobbers */);                                   \
8605        result;                                                          \
8606      })
8607 
8608 #define vld1_lane_s16(a, b, c)                                          \
8609   __extension__                                                         \
8610     ({                                                                  \
8611        int16x4_t b_ = (b);                                              \
8612        const int16_t * a_ = (a);                                        \
8613        int16x4_t result;                                                \
8614        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
8615                 : "=w"(result)                                          \
8616                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8617                 : /* No clobbers */);                                   \
8618        result;                                                          \
8619      })
8620 
8621 #define vld1_lane_s32(a, b, c)                                          \
8622   __extension__                                                         \
8623     ({                                                                  \
8624        int32x2_t b_ = (b);                                              \
8625        const int32_t * a_ = (a);                                        \
8626        int32x2_t result;                                                \
8627        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
8628                 : "=w"(result)                                          \
8629                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8630                 : /* No clobbers */);                                   \
8631        result;                                                          \
8632      })
8633 
8634 #define vld1_lane_s64(a, b, c)                                          \
8635   __extension__                                                         \
8636     ({                                                                  \
8637        int64x1_t b_ = (b);                                              \
8638        const int64_t * a_ = (a);                                        \
8639        int64x1_t result;                                                \
8640        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
8641                 : "=w"(result)                                          \
8642                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8643                 : /* No clobbers */);                                   \
8644        result;                                                          \
8645      })
8646 
8647 #define vld1_lane_u8(a, b, c)                                           \
8648   __extension__                                                         \
8649     ({                                                                  \
8650        uint8x8_t b_ = (b);                                              \
8651        const uint8_t * a_ = (a);                                        \
8652        uint8x8_t result;                                                \
8653        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
8654                 : "=w"(result)                                          \
8655                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8656                 : /* No clobbers */);                                   \
8657        result;                                                          \
8658      })
8659 
8660 #define vld1_lane_u16(a, b, c)                                          \
8661   __extension__                                                         \
8662     ({                                                                  \
8663        uint16x4_t b_ = (b);                                             \
8664        const uint16_t * a_ = (a);                                       \
8665        uint16x4_t result;                                               \
8666        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
8667                 : "=w"(result)                                          \
8668                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8669                 : /* No clobbers */);                                   \
8670        result;                                                          \
8671      })
8672 
8673 #define vld1_lane_u32(a, b, c)                                          \
8674   __extension__                                                         \
8675     ({                                                                  \
8676        uint32x2_t b_ = (b);                                             \
8677        const uint32_t * a_ = (a);                                       \
8678        uint32x2_t result;                                               \
8679        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
8680                 : "=w"(result)                                          \
8681                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8682                 : /* No clobbers */);                                   \
8683        result;                                                          \
8684      })
8685 
8686 #define vld1_lane_u64(a, b, c)                                          \
8687   __extension__                                                         \
8688     ({                                                                  \
8689        uint64x1_t b_ = (b);                                             \
8690        const uint64_t * a_ = (a);                                       \
8691        uint64x1_t result;                                               \
8692        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
8693                 : "=w"(result)                                          \
8694                 : "i" (c), "Utv"(*a_), "0"(b_)                          \
8695                 : /* No clobbers */);                                   \
8696        result;                                                          \
8697      })
8698 
8699 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_p8(const poly8_t * a)8700 vld1_p8 (const poly8_t * a)
8701 {
8702   poly8x8_t result;
8703   __asm__ ("ld1 {%0.8b}, %1"
8704 	   : "=w"(result)
8705 	   : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
8706 	   : /* No clobbers */);
8707   return result;
8708 }
8709 
8710 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_p16(const poly16_t * a)8711 vld1_p16 (const poly16_t * a)
8712 {
8713   poly16x4_t result;
8714   __asm__ ("ld1 {%0.4h}, %1"
8715 	   : "=w"(result)
8716 	   : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
8717 	   : /* No clobbers */);
8718   return result;
8719 }
8720 
8721 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8(const int8_t * a)8722 vld1_s8 (const int8_t * a)
8723 {
8724   int8x8_t result;
8725   __asm__ ("ld1 {%0.8b}, %1"
8726 	   : "=w"(result)
8727 	   : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
8728 	   : /* No clobbers */);
8729   return result;
8730 }
8731 
8732 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_s16(const int16_t * a)8733 vld1_s16 (const int16_t * a)
8734 {
8735   int16x4_t result;
8736   __asm__ ("ld1 {%0.4h}, %1"
8737 	   : "=w"(result)
8738 	   : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
8739 	   : /* No clobbers */);
8740   return result;
8741 }
8742 
8743 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_s32(const int32_t * a)8744 vld1_s32 (const int32_t * a)
8745 {
8746   int32x2_t result;
8747   __asm__ ("ld1 {%0.2s}, %1"
8748 	   : "=w"(result)
8749 	   : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
8750 	   : /* No clobbers */);
8751   return result;
8752 }
8753 
8754 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_s64(const int64_t * a)8755 vld1_s64 (const int64_t * a)
8756 {
8757   int64x1_t result;
8758   __asm__ ("ld1 {%0.1d}, %1"
8759 	   : "=w"(result)
8760 	   : "Utv"(*a)
8761 	   : /* No clobbers */);
8762   return result;
8763 }
8764 
8765 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_u8(const uint8_t * a)8766 vld1_u8 (const uint8_t * a)
8767 {
8768   uint8x8_t result;
8769   __asm__ ("ld1 {%0.8b}, %1"
8770 	   : "=w"(result)
8771 	   : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
8772 	   : /* No clobbers */);
8773   return result;
8774 }
8775 
8776 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_u16(const uint16_t * a)8777 vld1_u16 (const uint16_t * a)
8778 {
8779   uint16x4_t result;
8780   __asm__ ("ld1 {%0.4h}, %1"
8781 	   : "=w"(result)
8782 	   : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
8783 	   : /* No clobbers */);
8784   return result;
8785 }
8786 
8787 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_u32(const uint32_t * a)8788 vld1_u32 (const uint32_t * a)
8789 {
8790   uint32x2_t result;
8791   __asm__ ("ld1 {%0.2s}, %1"
8792 	   : "=w"(result)
8793 	   : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
8794 	   : /* No clobbers */);
8795   return result;
8796 }
8797 
8798 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_u64(const uint64_t * a)8799 vld1_u64 (const uint64_t * a)
8800 {
8801   uint64x1_t result;
8802   __asm__ ("ld1 {%0.1d}, %1"
8803 	   : "=w"(result)
8804 	   : "Utv"(*a)
8805 	   : /* No clobbers */);
8806   return result;
8807 }
8808 
8809 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32(const float32_t * a)8810 vld1q_dup_f32 (const float32_t * a)
8811 {
8812   float32x4_t result;
8813   __asm__ ("ld1r {%0.4s}, %1"
8814 	   : "=w"(result)
8815 	   : "Utv"(*a)
8816 	   : /* No clobbers */);
8817   return result;
8818 }
8819 
8820 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_dup_f64(const float64_t * a)8821 vld1q_dup_f64 (const float64_t * a)
8822 {
8823   float64x2_t result;
8824   __asm__ ("ld1r {%0.2d}, %1"
8825 	   : "=w"(result)
8826 	   : "Utv"(*a)
8827 	   : /* No clobbers */);
8828   return result;
8829 }
8830 
8831 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_dup_p8(const poly8_t * a)8832 vld1q_dup_p8 (const poly8_t * a)
8833 {
8834   poly8x16_t result;
8835   __asm__ ("ld1r {%0.16b}, %1"
8836 	   : "=w"(result)
8837 	   : "Utv"(*a)
8838 	   : /* No clobbers */);
8839   return result;
8840 }
8841 
8842 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_dup_p16(const poly16_t * a)8843 vld1q_dup_p16 (const poly16_t * a)
8844 {
8845   poly16x8_t result;
8846   __asm__ ("ld1r {%0.8h}, %1"
8847 	   : "=w"(result)
8848 	   : "Utv"(*a)
8849 	   : /* No clobbers */);
8850   return result;
8851 }
8852 
8853 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_dup_s8(const int8_t * a)8854 vld1q_dup_s8 (const int8_t * a)
8855 {
8856   int8x16_t result;
8857   __asm__ ("ld1r {%0.16b}, %1"
8858 	   : "=w"(result)
8859 	   : "Utv"(*a)
8860 	   : /* No clobbers */);
8861   return result;
8862 }
8863 
8864 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_dup_s16(const int16_t * a)8865 vld1q_dup_s16 (const int16_t * a)
8866 {
8867   int16x8_t result;
8868   __asm__ ("ld1r {%0.8h}, %1"
8869 	   : "=w"(result)
8870 	   : "Utv"(*a)
8871 	   : /* No clobbers */);
8872   return result;
8873 }
8874 
8875 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_dup_s32(const int32_t * a)8876 vld1q_dup_s32 (const int32_t * a)
8877 {
8878   int32x4_t result;
8879   __asm__ ("ld1r {%0.4s}, %1"
8880 	   : "=w"(result)
8881 	   : "Utv"(*a)
8882 	   : /* No clobbers */);
8883   return result;
8884 }
8885 
8886 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_dup_s64(const int64_t * a)8887 vld1q_dup_s64 (const int64_t * a)
8888 {
8889   int64x2_t result;
8890   __asm__ ("ld1r {%0.2d}, %1"
8891 	   : "=w"(result)
8892 	   : "Utv"(*a)
8893 	   : /* No clobbers */);
8894   return result;
8895 }
8896 
8897 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_dup_u8(const uint8_t * a)8898 vld1q_dup_u8 (const uint8_t * a)
8899 {
8900   uint8x16_t result;
8901   __asm__ ("ld1r {%0.16b}, %1"
8902 	   : "=w"(result)
8903 	   : "Utv"(*a)
8904 	   : /* No clobbers */);
8905   return result;
8906 }
8907 
8908 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_dup_u16(const uint16_t * a)8909 vld1q_dup_u16 (const uint16_t * a)
8910 {
8911   uint16x8_t result;
8912   __asm__ ("ld1r {%0.8h}, %1"
8913 	   : "=w"(result)
8914 	   : "Utv"(*a)
8915 	   : /* No clobbers */);
8916   return result;
8917 }
8918 
8919 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_dup_u32(const uint32_t * a)8920 vld1q_dup_u32 (const uint32_t * a)
8921 {
8922   uint32x4_t result;
8923   __asm__ ("ld1r {%0.4s}, %1"
8924 	   : "=w"(result)
8925 	   : "Utv"(*a)
8926 	   : /* No clobbers */);
8927   return result;
8928 }
8929 
8930 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_dup_u64(const uint64_t * a)8931 vld1q_dup_u64 (const uint64_t * a)
8932 {
8933   uint64x2_t result;
8934   __asm__ ("ld1r {%0.2d}, %1"
8935 	   : "=w"(result)
8936 	   : "Utv"(*a)
8937 	   : /* No clobbers */);
8938   return result;
8939 }
8940 
8941 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32(const float32_t * a)8942 vld1q_f32 (const float32_t * a)
8943 {
8944   float32x4_t result;
8945   __asm__ ("ld1 {%0.4s}, %1"
8946 	   : "=w"(result)
8947 	   : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
8948 	   : /* No clobbers */);
8949   return result;
8950 }
8951 
8952 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_f64(const float64_t * a)8953 vld1q_f64 (const float64_t * a)
8954 {
8955   float64x2_t result;
8956   __asm__ ("ld1 {%0.2d}, %1"
8957 	   : "=w"(result)
8958 	   : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
8959 	   : /* No clobbers */);
8960   return result;
8961 }
8962 
8963 #define vld1q_lane_f32(a, b, c)                                         \
8964   __extension__                                                         \
8965     ({                                                                  \
8966        float32x4_t b_ = (b);                                            \
8967        const float32_t * a_ = (a);                                      \
8968        float32x4_t result;                                              \
8969        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
8970                 : "=w"(result)                                          \
8971                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
8972                 : /* No clobbers */);                                   \
8973        result;                                                          \
8974      })
8975 
8976 #define vld1q_lane_f64(a, b, c)                                         \
8977   __extension__                                                         \
8978     ({                                                                  \
8979        float64x2_t b_ = (b);                                            \
8980        const float64_t * a_ = (a);                                      \
8981        float64x2_t result;                                              \
8982        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
8983                 : "=w"(result)                                          \
8984                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
8985                 : /* No clobbers */);                                   \
8986        result;                                                          \
8987      })
8988 
8989 #define vld1q_lane_p8(a, b, c)                                          \
8990   __extension__                                                         \
8991     ({                                                                  \
8992        poly8x16_t b_ = (b);                                             \
8993        const poly8_t * a_ = (a);                                        \
8994        poly8x16_t result;                                               \
8995        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
8996                 : "=w"(result)                                          \
8997                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
8998                 : /* No clobbers */);                                   \
8999        result;                                                          \
9000      })
9001 
9002 #define vld1q_lane_p16(a, b, c)                                         \
9003   __extension__                                                         \
9004     ({                                                                  \
9005        poly16x8_t b_ = (b);                                             \
9006        const poly16_t * a_ = (a);                                       \
9007        poly16x8_t result;                                               \
9008        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
9009                 : "=w"(result)                                          \
9010                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9011                 : /* No clobbers */);                                   \
9012        result;                                                          \
9013      })
9014 
9015 #define vld1q_lane_s8(a, b, c)                                          \
9016   __extension__                                                         \
9017     ({                                                                  \
9018        int8x16_t b_ = (b);                                              \
9019        const int8_t * a_ = (a);                                         \
9020        int8x16_t result;                                                \
9021        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
9022                 : "=w"(result)                                          \
9023                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9024                 : /* No clobbers */);                                   \
9025        result;                                                          \
9026      })
9027 
9028 #define vld1q_lane_s16(a, b, c)                                         \
9029   __extension__                                                         \
9030     ({                                                                  \
9031        int16x8_t b_ = (b);                                              \
9032        const int16_t * a_ = (a);                                        \
9033        int16x8_t result;                                                \
9034        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
9035                 : "=w"(result)                                          \
9036                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9037                 : /* No clobbers */);                                   \
9038        result;                                                          \
9039      })
9040 
9041 #define vld1q_lane_s32(a, b, c)                                         \
9042   __extension__                                                         \
9043     ({                                                                  \
9044        int32x4_t b_ = (b);                                              \
9045        const int32_t * a_ = (a);                                        \
9046        int32x4_t result;                                                \
9047        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
9048                 : "=w"(result)                                          \
9049                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9050                 : /* No clobbers */);                                   \
9051        result;                                                          \
9052      })
9053 
9054 #define vld1q_lane_s64(a, b, c)                                         \
9055   __extension__                                                         \
9056     ({                                                                  \
9057        int64x2_t b_ = (b);                                              \
9058        const int64_t * a_ = (a);                                        \
9059        int64x2_t result;                                                \
9060        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
9061                 : "=w"(result)                                          \
9062                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9063                 : /* No clobbers */);                                   \
9064        result;                                                          \
9065      })
9066 
9067 #define vld1q_lane_u8(a, b, c)                                          \
9068   __extension__                                                         \
9069     ({                                                                  \
9070        uint8x16_t b_ = (b);                                             \
9071        const uint8_t * a_ = (a);                                        \
9072        uint8x16_t result;                                               \
9073        __asm__ ("ld1 {%0.b}[%1], %2"                                    \
9074                 : "=w"(result)                                          \
9075                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9076                 : /* No clobbers */);                                   \
9077        result;                                                          \
9078      })
9079 
9080 #define vld1q_lane_u16(a, b, c)                                         \
9081   __extension__                                                         \
9082     ({                                                                  \
9083        uint16x8_t b_ = (b);                                             \
9084        const uint16_t * a_ = (a);                                       \
9085        uint16x8_t result;                                               \
9086        __asm__ ("ld1 {%0.h}[%1], %2"                                    \
9087                 : "=w"(result)                                          \
9088                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9089                 : /* No clobbers */);                                   \
9090        result;                                                          \
9091      })
9092 
9093 #define vld1q_lane_u32(a, b, c)                                         \
9094   __extension__                                                         \
9095     ({                                                                  \
9096        uint32x4_t b_ = (b);                                             \
9097        const uint32_t * a_ = (a);                                       \
9098        uint32x4_t result;                                               \
9099        __asm__ ("ld1 {%0.s}[%1], %2"                                    \
9100                 : "=w"(result)                                          \
9101                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9102                 : /* No clobbers */);                                   \
9103        result;                                                          \
9104      })
9105 
9106 #define vld1q_lane_u64(a, b, c)                                         \
9107   __extension__                                                         \
9108     ({                                                                  \
9109        uint64x2_t b_ = (b);                                             \
9110        const uint64_t * a_ = (a);                                       \
9111        uint64x2_t result;                                               \
9112        __asm__ ("ld1 {%0.d}[%1], %2"                                    \
9113                 : "=w"(result)                                          \
9114                 : "i"(c), "Utv"(*a_), "0"(b_)                           \
9115                 : /* No clobbers */);                                   \
9116        result;                                                          \
9117      })
9118 
9119 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_p8(const poly8_t * a)9120 vld1q_p8 (const poly8_t * a)
9121 {
9122   poly8x16_t result;
9123   __asm__ ("ld1 {%0.16b}, %1"
9124 	   : "=w"(result)
9125 	   : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
9126 	   : /* No clobbers */);
9127   return result;
9128 }
9129 
9130 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_p16(const poly16_t * a)9131 vld1q_p16 (const poly16_t * a)
9132 {
9133   poly16x8_t result;
9134   __asm__ ("ld1 {%0.16b}, %1"
9135 	   : "=w"(result)
9136 	   : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
9137 	   : /* No clobbers */);
9138   return result;
9139 }
9140 
9141 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8(const int8_t * a)9142 vld1q_s8 (const int8_t * a)
9143 {
9144   int8x16_t result;
9145   __asm__ ("ld1 {%0.16b}, %1"
9146 	   : "=w"(result)
9147 	   : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
9148 	   : /* No clobbers */);
9149   return result;
9150 }
9151 
9152 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_s16(const int16_t * a)9153 vld1q_s16 (const int16_t * a)
9154 {
9155   int16x8_t result;
9156   __asm__ ("ld1 {%0.8h}, %1"
9157 	   : "=w"(result)
9158 	   : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
9159 	   : /* No clobbers */);
9160   return result;
9161 }
9162 
9163 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_s32(const int32_t * a)9164 vld1q_s32 (const int32_t * a)
9165 {
9166   int32x4_t result;
9167   __asm__ ("ld1 {%0.4s}, %1"
9168 	   : "=w"(result)
9169 	   : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
9170 	   : /* No clobbers */);
9171   return result;
9172 }
9173 
9174 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_s64(const int64_t * a)9175 vld1q_s64 (const int64_t * a)
9176 {
9177   int64x2_t result;
9178   __asm__ ("ld1 {%0.2d}, %1"
9179 	   : "=w"(result)
9180 	   : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
9181 	   : /* No clobbers */);
9182   return result;
9183 }
9184 
9185 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_u8(const uint8_t * a)9186 vld1q_u8 (const uint8_t * a)
9187 {
9188   uint8x16_t result;
9189   __asm__ ("ld1 {%0.16b}, %1"
9190 	   : "=w"(result)
9191 	   : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
9192 	   : /* No clobbers */);
9193   return result;
9194 }
9195 
9196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_u16(const uint16_t * a)9197 vld1q_u16 (const uint16_t * a)
9198 {
9199   uint16x8_t result;
9200   __asm__ ("ld1 {%0.8h}, %1"
9201 	   : "=w"(result)
9202 	   : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
9203 	   : /* No clobbers */);
9204   return result;
9205 }
9206 
9207 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_u32(const uint32_t * a)9208 vld1q_u32 (const uint32_t * a)
9209 {
9210   uint32x4_t result;
9211   __asm__ ("ld1 {%0.4s}, %1"
9212 	   : "=w"(result)
9213 	   : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
9214 	   : /* No clobbers */);
9215   return result;
9216 }
9217 
9218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_u64(const uint64_t * a)9219 vld1q_u64 (const uint64_t * a)
9220 {
9221   uint64x2_t result;
9222   __asm__ ("ld1 {%0.2d}, %1"
9223 	   : "=w"(result)
9224 	   : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
9225 	   : /* No clobbers */);
9226   return result;
9227 }
9228 
9229 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmaxnm_f32(float32x2_t a,float32x2_t b)9230 vmaxnm_f32 (float32x2_t a, float32x2_t b)
9231 {
9232   float32x2_t result;
9233   __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s"
9234            : "=w"(result)
9235            : "w"(a), "w"(b)
9236            : /* No clobbers */);
9237   return result;
9238 }
9239 
9240 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32(float32x4_t a,float32x4_t b)9241 vmaxnmq_f32 (float32x4_t a, float32x4_t b)
9242 {
9243   float32x4_t result;
9244   __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s"
9245            : "=w"(result)
9246            : "w"(a), "w"(b)
9247            : /* No clobbers */);
9248   return result;
9249 }
9250 
9251 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxnmq_f64(float64x2_t a,float64x2_t b)9252 vmaxnmq_f64 (float64x2_t a, float64x2_t b)
9253 {
9254   float64x2_t result;
9255   __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d"
9256            : "=w"(result)
9257            : "w"(a), "w"(b)
9258            : /* No clobbers */);
9259   return result;
9260 }
9261 
9262 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmvq_f32(float32x4_t a)9263 vmaxnmvq_f32 (float32x4_t a)
9264 {
9265   float32_t result;
9266   __asm__ ("fmaxnmv %s0,%1.4s"
9267            : "=w"(result)
9268            : "w"(a)
9269            : /* No clobbers */);
9270   return result;
9271 }
9272 
9273 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxv_s8(int8x8_t a)9274 vmaxv_s8 (int8x8_t a)
9275 {
9276   int8_t result;
9277   __asm__ ("smaxv %b0,%1.8b"
9278            : "=w"(result)
9279            : "w"(a)
9280            : /* No clobbers */);
9281   return result;
9282 }
9283 
9284 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxv_s16(int16x4_t a)9285 vmaxv_s16 (int16x4_t a)
9286 {
9287   int16_t result;
9288   __asm__ ("smaxv %h0,%1.4h"
9289            : "=w"(result)
9290            : "w"(a)
9291            : /* No clobbers */);
9292   return result;
9293 }
9294 
9295 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxv_u8(uint8x8_t a)9296 vmaxv_u8 (uint8x8_t a)
9297 {
9298   uint8_t result;
9299   __asm__ ("umaxv %b0,%1.8b"
9300            : "=w"(result)
9301            : "w"(a)
9302            : /* No clobbers */);
9303   return result;
9304 }
9305 
9306 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxv_u16(uint16x4_t a)9307 vmaxv_u16 (uint16x4_t a)
9308 {
9309   uint16_t result;
9310   __asm__ ("umaxv %h0,%1.4h"
9311            : "=w"(result)
9312            : "w"(a)
9313            : /* No clobbers */);
9314   return result;
9315 }
9316 
9317 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxvq_f32(float32x4_t a)9318 vmaxvq_f32 (float32x4_t a)
9319 {
9320   float32_t result;
9321   __asm__ ("fmaxv %s0,%1.4s"
9322            : "=w"(result)
9323            : "w"(a)
9324            : /* No clobbers */);
9325   return result;
9326 }
9327 
9328 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxvq_s8(int8x16_t a)9329 vmaxvq_s8 (int8x16_t a)
9330 {
9331   int8_t result;
9332   __asm__ ("smaxv %b0,%1.16b"
9333            : "=w"(result)
9334            : "w"(a)
9335            : /* No clobbers */);
9336   return result;
9337 }
9338 
9339 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxvq_s16(int16x8_t a)9340 vmaxvq_s16 (int16x8_t a)
9341 {
9342   int16_t result;
9343   __asm__ ("smaxv %h0,%1.8h"
9344            : "=w"(result)
9345            : "w"(a)
9346            : /* No clobbers */);
9347   return result;
9348 }
9349 
9350 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxvq_s32(int32x4_t a)9351 vmaxvq_s32 (int32x4_t a)
9352 {
9353   int32_t result;
9354   __asm__ ("smaxv %s0,%1.4s"
9355            : "=w"(result)
9356            : "w"(a)
9357            : /* No clobbers */);
9358   return result;
9359 }
9360 
9361 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxvq_u8(uint8x16_t a)9362 vmaxvq_u8 (uint8x16_t a)
9363 {
9364   uint8_t result;
9365   __asm__ ("umaxv %b0,%1.16b"
9366            : "=w"(result)
9367            : "w"(a)
9368            : /* No clobbers */);
9369   return result;
9370 }
9371 
9372 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxvq_u16(uint16x8_t a)9373 vmaxvq_u16 (uint16x8_t a)
9374 {
9375   uint16_t result;
9376   __asm__ ("umaxv %h0,%1.8h"
9377            : "=w"(result)
9378            : "w"(a)
9379            : /* No clobbers */);
9380   return result;
9381 }
9382 
9383 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxvq_u32(uint32x4_t a)9384 vmaxvq_u32 (uint32x4_t a)
9385 {
9386   uint32_t result;
9387   __asm__ ("umaxv %s0,%1.4s"
9388            : "=w"(result)
9389            : "w"(a)
9390            : /* No clobbers */);
9391   return result;
9392 }
9393 
9394 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmvq_f32(float32x4_t a)9395 vminnmvq_f32 (float32x4_t a)
9396 {
9397   float32_t result;
9398   __asm__ ("fminnmv %s0,%1.4s"
9399            : "=w"(result)
9400            : "w"(a)
9401            : /* No clobbers */);
9402   return result;
9403 }
9404 
9405 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminv_s8(int8x8_t a)9406 vminv_s8 (int8x8_t a)
9407 {
9408   int8_t result;
9409   __asm__ ("sminv %b0,%1.8b"
9410            : "=w"(result)
9411            : "w"(a)
9412            : /* No clobbers */);
9413   return result;
9414 }
9415 
9416 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminv_s16(int16x4_t a)9417 vminv_s16 (int16x4_t a)
9418 {
9419   int16_t result;
9420   __asm__ ("sminv %h0,%1.4h"
9421            : "=w"(result)
9422            : "w"(a)
9423            : /* No clobbers */);
9424   return result;
9425 }
9426 
9427 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminv_u8(uint8x8_t a)9428 vminv_u8 (uint8x8_t a)
9429 {
9430   uint8_t result;
9431   __asm__ ("uminv %b0,%1.8b"
9432            : "=w"(result)
9433            : "w"(a)
9434            : /* No clobbers */);
9435   return result;
9436 }
9437 
9438 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminv_u16(uint16x4_t a)9439 vminv_u16 (uint16x4_t a)
9440 {
9441   uint16_t result;
9442   __asm__ ("uminv %h0,%1.4h"
9443            : "=w"(result)
9444            : "w"(a)
9445            : /* No clobbers */);
9446   return result;
9447 }
9448 
9449 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminvq_f32(float32x4_t a)9450 vminvq_f32 (float32x4_t a)
9451 {
9452   float32_t result;
9453   __asm__ ("fminv %s0,%1.4s"
9454            : "=w"(result)
9455            : "w"(a)
9456            : /* No clobbers */);
9457   return result;
9458 }
9459 
9460 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminvq_s8(int8x16_t a)9461 vminvq_s8 (int8x16_t a)
9462 {
9463   int8_t result;
9464   __asm__ ("sminv %b0,%1.16b"
9465            : "=w"(result)
9466            : "w"(a)
9467            : /* No clobbers */);
9468   return result;
9469 }
9470 
9471 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminvq_s16(int16x8_t a)9472 vminvq_s16 (int16x8_t a)
9473 {
9474   int16_t result;
9475   __asm__ ("sminv %h0,%1.8h"
9476            : "=w"(result)
9477            : "w"(a)
9478            : /* No clobbers */);
9479   return result;
9480 }
9481 
9482 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminvq_s32(int32x4_t a)9483 vminvq_s32 (int32x4_t a)
9484 {
9485   int32_t result;
9486   __asm__ ("sminv %s0,%1.4s"
9487            : "=w"(result)
9488            : "w"(a)
9489            : /* No clobbers */);
9490   return result;
9491 }
9492 
9493 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminvq_u8(uint8x16_t a)9494 vminvq_u8 (uint8x16_t a)
9495 {
9496   uint8_t result;
9497   __asm__ ("uminv %b0,%1.16b"
9498            : "=w"(result)
9499            : "w"(a)
9500            : /* No clobbers */);
9501   return result;
9502 }
9503 
9504 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminvq_u16(uint16x8_t a)9505 vminvq_u16 (uint16x8_t a)
9506 {
9507   uint16_t result;
9508   __asm__ ("uminv %h0,%1.8h"
9509            : "=w"(result)
9510            : "w"(a)
9511            : /* No clobbers */);
9512   return result;
9513 }
9514 
9515 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminvq_u32(uint32x4_t a)9516 vminvq_u32 (uint32x4_t a)
9517 {
9518   uint32_t result;
9519   __asm__ ("uminv %s0,%1.4s"
9520            : "=w"(result)
9521            : "w"(a)
9522            : /* No clobbers */);
9523   return result;
9524 }
9525 
9526 #define vmla_lane_f32(a, b, c, d)                                       \
9527   __extension__                                                         \
9528     ({                                                                  \
9529        float32x2_t c_ = (c);                                            \
9530        float32x2_t b_ = (b);                                            \
9531        float32x2_t a_ = (a);                                            \
9532        float32x2_t result;                                              \
9533        float32x2_t t1;                                                  \
9534        __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
9535                 : "=w"(result), "=w"(t1)                                \
9536                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9537                 : /* No clobbers */);                                   \
9538        result;                                                          \
9539      })
9540 
9541 #define vmla_lane_s16(a, b, c, d)                                       \
9542   __extension__                                                         \
9543     ({                                                                  \
9544        int16x4_t c_ = (c);                                              \
9545        int16x4_t b_ = (b);                                              \
9546        int16x4_t a_ = (a);                                              \
9547        int16x4_t result;                                                \
9548        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
9549                 : "=w"(result)                                          \
9550                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9551                 : /* No clobbers */);                                   \
9552        result;                                                          \
9553      })
9554 
9555 #define vmla_lane_s32(a, b, c, d)                                       \
9556   __extension__                                                         \
9557     ({                                                                  \
9558        int32x2_t c_ = (c);                                              \
9559        int32x2_t b_ = (b);                                              \
9560        int32x2_t a_ = (a);                                              \
9561        int32x2_t result;                                                \
9562        __asm__ ("mla %0.2s, %2.2s, %3.s[%4]"                            \
9563                 : "=w"(result)                                          \
9564                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9565                 : /* No clobbers */);                                   \
9566        result;                                                          \
9567      })
9568 
9569 #define vmla_lane_u16(a, b, c, d)                                       \
9570   __extension__                                                         \
9571     ({                                                                  \
9572        uint16x4_t c_ = (c);                                             \
9573        uint16x4_t b_ = (b);                                             \
9574        uint16x4_t a_ = (a);                                             \
9575        uint16x4_t result;                                               \
9576        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
9577                 : "=w"(result)                                          \
9578                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9579                 : /* No clobbers */);                                   \
9580        result;                                                          \
9581      })
9582 
9583 #define vmla_lane_u32(a, b, c, d)                                       \
9584   __extension__                                                         \
9585     ({                                                                  \
9586        uint32x2_t c_ = (c);                                             \
9587        uint32x2_t b_ = (b);                                             \
9588        uint32x2_t a_ = (a);                                             \
9589        uint32x2_t result;                                               \
9590        __asm__ ("mla %0.2s, %2.2s, %3.s[%4]"                            \
9591                 : "=w"(result)                                          \
9592                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9593                 : /* No clobbers */);                                   \
9594        result;                                                          \
9595      })
9596 
9597 #define vmla_laneq_s16(a, b, c, d)                                      \
9598   __extension__                                                         \
9599     ({                                                                  \
9600        int16x8_t c_ = (c);                                              \
9601        int16x4_t b_ = (b);                                              \
9602        int16x4_t a_ = (a);                                              \
9603        int16x4_t result;                                                \
9604        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
9605                 : "=w"(result)                                          \
9606                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9607                 : /* No clobbers */);                                   \
9608        result;                                                          \
9609      })
9610 
9611 #define vmla_laneq_s32(a, b, c, d)                                      \
9612   __extension__                                                         \
9613     ({                                                                  \
9614        int32x4_t c_ = (c);                                              \
9615        int32x2_t b_ = (b);                                              \
9616        int32x2_t a_ = (a);                                              \
9617        int32x2_t result;                                                \
9618        __asm__ ("mla %0.2s, %2.2s, %3.s[%4]"                            \
9619                 : "=w"(result)                                          \
9620                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9621                 : /* No clobbers */);                                   \
9622        result;                                                          \
9623      })
9624 
9625 #define vmla_laneq_u16(a, b, c, d)                                      \
9626   __extension__                                                         \
9627     ({                                                                  \
9628        uint16x8_t c_ = (c);                                             \
9629        uint16x4_t b_ = (b);                                             \
9630        uint16x4_t a_ = (a);                                             \
9631        uint16x4_t result;                                               \
9632        __asm__ ("mla %0.4h, %2.4h, %3.h[%4]"                            \
9633                 : "=w"(result)                                          \
9634                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9635                 : /* No clobbers */);                                   \
9636        result;                                                          \
9637      })
9638 
9639 #define vmla_laneq_u32(a, b, c, d)                                      \
9640   __extension__                                                         \
9641     ({                                                                  \
9642        uint32x4_t c_ = (c);                                             \
9643        uint32x2_t b_ = (b);                                             \
9644        uint32x2_t a_ = (a);                                             \
9645        uint32x2_t result;                                               \
9646        __asm__ ("mla %0.2s, %2.2s, %3.s[%4]"                            \
9647                 : "=w"(result)                                          \
9648                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9649                 : /* No clobbers */);                                   \
9650        result;                                                          \
9651      })
9652 
9653 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)9654 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
9655 {
9656   float32x2_t result;
9657   float32x2_t t1;
9658   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
9659            : "=w"(result), "=w"(t1)
9660            : "0"(a), "w"(b), "w"(c)
9661            : /* No clobbers */);
9662   return result;
9663 }
9664 
9665 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)9666 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
9667 {
9668   int16x4_t result;
9669   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9670            : "=w"(result)
9671            : "0"(a), "w"(b), "w"(c)
9672            : /* No clobbers */);
9673   return result;
9674 }
9675 
9676 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)9677 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
9678 {
9679   int32x2_t result;
9680   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9681            : "=w"(result)
9682            : "0"(a), "w"(b), "w"(c)
9683            : /* No clobbers */);
9684   return result;
9685 }
9686 
9687 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)9688 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
9689 {
9690   uint16x4_t result;
9691   __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9692            : "=w"(result)
9693            : "0"(a), "w"(b), "w"(c)
9694            : /* No clobbers */);
9695   return result;
9696 }
9697 
9698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)9699 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
9700 {
9701   uint32x2_t result;
9702   __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9703            : "=w"(result)
9704            : "0"(a), "w"(b), "w"(c)
9705            : /* No clobbers */);
9706   return result;
9707 }
9708 
9709 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)9710 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
9711 {
9712   int8x8_t result;
9713   __asm__ ("mla %0.8b, %2.8b, %3.8b"
9714            : "=w"(result)
9715            : "0"(a), "w"(b), "w"(c)
9716            : /* No clobbers */);
9717   return result;
9718 }
9719 
9720 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)9721 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
9722 {
9723   int16x4_t result;
9724   __asm__ ("mla %0.4h, %2.4h, %3.4h"
9725            : "=w"(result)
9726            : "0"(a), "w"(b), "w"(c)
9727            : /* No clobbers */);
9728   return result;
9729 }
9730 
9731 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)9732 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
9733 {
9734   int32x2_t result;
9735   __asm__ ("mla %0.2s, %2.2s, %3.2s"
9736            : "=w"(result)
9737            : "0"(a), "w"(b), "w"(c)
9738            : /* No clobbers */);
9739   return result;
9740 }
9741 
9742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)9743 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
9744 {
9745   uint8x8_t result;
9746   __asm__ ("mla %0.8b, %2.8b, %3.8b"
9747            : "=w"(result)
9748            : "0"(a), "w"(b), "w"(c)
9749            : /* No clobbers */);
9750   return result;
9751 }
9752 
9753 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)9754 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
9755 {
9756   uint16x4_t result;
9757   __asm__ ("mla %0.4h, %2.4h, %3.4h"
9758            : "=w"(result)
9759            : "0"(a), "w"(b), "w"(c)
9760            : /* No clobbers */);
9761   return result;
9762 }
9763 
9764 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)9765 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
9766 {
9767   uint32x2_t result;
9768   __asm__ ("mla %0.2s, %2.2s, %3.2s"
9769            : "=w"(result)
9770            : "0"(a), "w"(b), "w"(c)
9771            : /* No clobbers */);
9772   return result;
9773 }
9774 
9775 #define vmlal_high_lane_s16(a, b, c, d)                                 \
9776   __extension__                                                         \
9777     ({                                                                  \
9778        int16x8_t c_ = (c);                                              \
9779        int16x8_t b_ = (b);                                              \
9780        int32x4_t a_ = (a);                                              \
9781        int32x4_t result;                                                \
9782        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
9783                 : "=w"(result)                                          \
9784                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9785                 : /* No clobbers */);                                   \
9786        result;                                                          \
9787      })
9788 
9789 #define vmlal_high_lane_s32(a, b, c, d)                                 \
9790   __extension__                                                         \
9791     ({                                                                  \
9792        int32x4_t c_ = (c);                                              \
9793        int32x4_t b_ = (b);                                              \
9794        int64x2_t a_ = (a);                                              \
9795        int64x2_t result;                                                \
9796        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
9797                 : "=w"(result)                                          \
9798                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9799                 : /* No clobbers */);                                   \
9800        result;                                                          \
9801      })
9802 
9803 #define vmlal_high_lane_u16(a, b, c, d)                                 \
9804   __extension__                                                         \
9805     ({                                                                  \
9806        uint16x8_t c_ = (c);                                             \
9807        uint16x8_t b_ = (b);                                             \
9808        uint32x4_t a_ = (a);                                             \
9809        uint32x4_t result;                                               \
9810        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
9811                 : "=w"(result)                                          \
9812                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9813                 : /* No clobbers */);                                   \
9814        result;                                                          \
9815      })
9816 
9817 #define vmlal_high_lane_u32(a, b, c, d)                                 \
9818   __extension__                                                         \
9819     ({                                                                  \
9820        uint32x4_t c_ = (c);                                             \
9821        uint32x4_t b_ = (b);                                             \
9822        uint64x2_t a_ = (a);                                             \
9823        uint64x2_t result;                                               \
9824        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
9825                 : "=w"(result)                                          \
9826                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9827                 : /* No clobbers */);                                   \
9828        result;                                                          \
9829      })
9830 
9831 #define vmlal_high_laneq_s16(a, b, c, d)                                \
9832   __extension__                                                         \
9833     ({                                                                  \
9834        int16x8_t c_ = (c);                                              \
9835        int16x8_t b_ = (b);                                              \
9836        int32x4_t a_ = (a);                                              \
9837        int32x4_t result;                                                \
9838        __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]"                         \
9839                 : "=w"(result)                                          \
9840                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9841                 : /* No clobbers */);                                   \
9842        result;                                                          \
9843      })
9844 
9845 #define vmlal_high_laneq_s32(a, b, c, d)                                \
9846   __extension__                                                         \
9847     ({                                                                  \
9848        int32x4_t c_ = (c);                                              \
9849        int32x4_t b_ = (b);                                              \
9850        int64x2_t a_ = (a);                                              \
9851        int64x2_t result;                                                \
9852        __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]"                         \
9853                 : "=w"(result)                                          \
9854                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9855                 : /* No clobbers */);                                   \
9856        result;                                                          \
9857      })
9858 
9859 #define vmlal_high_laneq_u16(a, b, c, d)                                \
9860   __extension__                                                         \
9861     ({                                                                  \
9862        uint16x8_t c_ = (c);                                             \
9863        uint16x8_t b_ = (b);                                             \
9864        uint32x4_t a_ = (a);                                             \
9865        uint32x4_t result;                                               \
9866        __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]"                         \
9867                 : "=w"(result)                                          \
9868                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9869                 : /* No clobbers */);                                   \
9870        result;                                                          \
9871      })
9872 
9873 #define vmlal_high_laneq_u32(a, b, c, d)                                \
9874   __extension__                                                         \
9875     ({                                                                  \
9876        uint32x4_t c_ = (c);                                             \
9877        uint32x4_t b_ = (b);                                             \
9878        uint64x2_t a_ = (a);                                             \
9879        uint64x2_t result;                                               \
9880        __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]"                         \
9881                 : "=w"(result)                                          \
9882                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
9883                 : /* No clobbers */);                                   \
9884        result;                                                          \
9885      })
9886 
9887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)9888 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
9889 {
9890   int32x4_t result;
9891   __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
9892            : "=w"(result)
9893            : "0"(a), "w"(b), "w"(c)
9894            : /* No clobbers */);
9895   return result;
9896 }
9897 
9898 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)9899 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
9900 {
9901   int64x2_t result;
9902   __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
9903            : "=w"(result)
9904            : "0"(a), "w"(b), "w"(c)
9905            : /* No clobbers */);
9906   return result;
9907 }
9908 
9909 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)9910 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
9911 {
9912   uint32x4_t result;
9913   __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
9914            : "=w"(result)
9915            : "0"(a), "w"(b), "w"(c)
9916            : /* No clobbers */);
9917   return result;
9918 }
9919 
9920 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)9921 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
9922 {
9923   uint64x2_t result;
9924   __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
9925            : "=w"(result)
9926            : "0"(a), "w"(b), "w"(c)
9927            : /* No clobbers */);
9928   return result;
9929 }
9930 
9931 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlal_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)9932 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
9933 {
9934   int16x8_t result;
9935   __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
9936            : "=w"(result)
9937            : "0"(a), "w"(b), "w"(c)
9938            : /* No clobbers */);
9939   return result;
9940 }
9941 
9942 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)9943 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
9944 {
9945   int32x4_t result;
9946   __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
9947            : "=w"(result)
9948            : "0"(a), "w"(b), "w"(c)
9949            : /* No clobbers */);
9950   return result;
9951 }
9952 
9953 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)9954 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
9955 {
9956   int64x2_t result;
9957   __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
9958            : "=w"(result)
9959            : "0"(a), "w"(b), "w"(c)
9960            : /* No clobbers */);
9961   return result;
9962 }
9963 
9964 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlal_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)9965 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
9966 {
9967   uint16x8_t result;
9968   __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
9969            : "=w"(result)
9970            : "0"(a), "w"(b), "w"(c)
9971            : /* No clobbers */);
9972   return result;
9973 }
9974 
9975 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)9976 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
9977 {
9978   uint32x4_t result;
9979   __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
9980            : "=w"(result)
9981            : "0"(a), "w"(b), "w"(c)
9982            : /* No clobbers */);
9983   return result;
9984 }
9985 
9986 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)9987 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
9988 {
9989   uint64x2_t result;
9990   __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
9991            : "=w"(result)
9992            : "0"(a), "w"(b), "w"(c)
9993            : /* No clobbers */);
9994   return result;
9995 }
9996 
9997 #define vmlal_lane_s16(a, b, c, d)                                      \
9998   __extension__                                                         \
9999     ({                                                                  \
10000        int16x4_t c_ = (c);                                              \
10001        int16x4_t b_ = (b);                                              \
10002        int32x4_t a_ = (a);                                              \
10003        int32x4_t result;                                                \
10004        __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]"                            \
10005                 : "=w"(result)                                          \
10006                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10007                 : /* No clobbers */);                                   \
10008        result;                                                          \
10009      })
10010 
10011 #define vmlal_lane_s32(a, b, c, d)                                      \
10012   __extension__                                                         \
10013     ({                                                                  \
10014        int32x2_t c_ = (c);                                              \
10015        int32x2_t b_ = (b);                                              \
10016        int64x2_t a_ = (a);                                              \
10017        int64x2_t result;                                                \
10018        __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]"                            \
10019                 : "=w"(result)                                          \
10020                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10021                 : /* No clobbers */);                                   \
10022        result;                                                          \
10023      })
10024 
10025 #define vmlal_lane_u16(a, b, c, d)                                      \
10026   __extension__                                                         \
10027     ({                                                                  \
10028        uint16x4_t c_ = (c);                                             \
10029        uint16x4_t b_ = (b);                                             \
10030        uint32x4_t a_ = (a);                                             \
10031        uint32x4_t result;                                               \
10032        __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]"                            \
10033                 : "=w"(result)                                          \
10034                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10035                 : /* No clobbers */);                                   \
10036        result;                                                          \
10037      })
10038 
10039 #define vmlal_lane_u32(a, b, c, d)                                      \
10040   __extension__                                                         \
10041     ({                                                                  \
10042        uint32x2_t c_ = (c);                                             \
10043        uint32x2_t b_ = (b);                                             \
10044        uint64x2_t a_ = (a);                                             \
10045        uint64x2_t result;                                               \
10046        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
10047                 : "=w"(result)                                          \
10048                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10049                 : /* No clobbers */);                                   \
10050        result;                                                          \
10051      })
10052 
10053 #define vmlal_laneq_s16(a, b, c, d)                                     \
10054   __extension__                                                         \
10055     ({                                                                  \
10056        int16x8_t c_ = (c);                                              \
10057        int16x4_t b_ = (b);                                              \
10058        int32x4_t a_ = (a);                                              \
10059        int32x4_t result;                                                \
10060        __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]"                          \
10061                 : "=w"(result)                                          \
10062                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10063                 : /* No clobbers */);                                   \
10064        result;                                                          \
10065      })
10066 
10067 #define vmlal_laneq_s32(a, b, c, d)                                     \
10068   __extension__                                                         \
10069     ({                                                                  \
10070        int32x4_t c_ = (c);                                              \
10071        int32x2_t b_ = (b);                                              \
10072        int64x2_t a_ = (a);                                              \
10073        int64x2_t result;                                                \
10074        __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]"                          \
10075                 : "=w"(result)                                          \
10076                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10077                 : /* No clobbers */);                                   \
10078        result;                                                          \
10079      })
10080 
10081 #define vmlal_laneq_u16(a, b, c, d)                                     \
10082   __extension__                                                         \
10083     ({                                                                  \
10084        uint16x8_t c_ = (c);                                             \
10085        uint16x4_t b_ = (b);                                             \
10086        uint32x4_t a_ = (a);                                             \
10087        uint32x4_t result;                                               \
10088        __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]"                          \
10089                 : "=w"(result)                                          \
10090                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10091                 : /* No clobbers */);                                   \
10092        result;                                                          \
10093      })
10094 
10095 #define vmlal_laneq_u32(a, b, c, d)                                     \
10096   __extension__                                                         \
10097     ({                                                                  \
10098        uint32x4_t c_ = (c);                                             \
10099        uint32x2_t b_ = (b);                                             \
10100        uint64x2_t a_ = (a);                                             \
10101        uint64x2_t result;                                               \
10102        __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]"                          \
10103                 : "=w"(result)                                          \
10104                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10105                 : /* No clobbers */);                                   \
10106        result;                                                          \
10107      })
10108 
10109 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)10110 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
10111 {
10112   int32x4_t result;
10113   __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
10114            : "=w"(result)
10115            : "0"(a), "w"(b), "w"(c)
10116            : /* No clobbers */);
10117   return result;
10118 }
10119 
10120 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)10121 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
10122 {
10123   int64x2_t result;
10124   __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
10125            : "=w"(result)
10126            : "0"(a), "w"(b), "w"(c)
10127            : /* No clobbers */);
10128   return result;
10129 }
10130 
10131 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)10132 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
10133 {
10134   uint32x4_t result;
10135   __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
10136            : "=w"(result)
10137            : "0"(a), "w"(b), "w"(c)
10138            : /* No clobbers */);
10139   return result;
10140 }
10141 
10142 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)10143 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
10144 {
10145   uint64x2_t result;
10146   __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
10147            : "=w"(result)
10148            : "0"(a), "w"(b), "w"(c)
10149            : /* No clobbers */);
10150   return result;
10151 }
10152 
10153 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)10154 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
10155 {
10156   int16x8_t result;
10157   __asm__ ("smlal %0.8h,%2.8b,%3.8b"
10158            : "=w"(result)
10159            : "0"(a), "w"(b), "w"(c)
10160            : /* No clobbers */);
10161   return result;
10162 }
10163 
10164 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)10165 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
10166 {
10167   int32x4_t result;
10168   __asm__ ("smlal %0.4s,%2.4h,%3.4h"
10169            : "=w"(result)
10170            : "0"(a), "w"(b), "w"(c)
10171            : /* No clobbers */);
10172   return result;
10173 }
10174 
10175 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)10176 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
10177 {
10178   int64x2_t result;
10179   __asm__ ("smlal %0.2d,%2.2s,%3.2s"
10180            : "=w"(result)
10181            : "0"(a), "w"(b), "w"(c)
10182            : /* No clobbers */);
10183   return result;
10184 }
10185 
10186 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)10187 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
10188 {
10189   uint16x8_t result;
10190   __asm__ ("umlal %0.8h,%2.8b,%3.8b"
10191            : "=w"(result)
10192            : "0"(a), "w"(b), "w"(c)
10193            : /* No clobbers */);
10194   return result;
10195 }
10196 
10197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)10198 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
10199 {
10200   uint32x4_t result;
10201   __asm__ ("umlal %0.4s,%2.4h,%3.4h"
10202            : "=w"(result)
10203            : "0"(a), "w"(b), "w"(c)
10204            : /* No clobbers */);
10205   return result;
10206 }
10207 
10208 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)10209 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
10210 {
10211   uint64x2_t result;
10212   __asm__ ("umlal %0.2d,%2.2s,%3.2s"
10213            : "=w"(result)
10214            : "0"(a), "w"(b), "w"(c)
10215            : /* No clobbers */);
10216   return result;
10217 }
10218 
10219 #define vmlaq_lane_f32(a, b, c, d)                                      \
10220   __extension__                                                         \
10221     ({                                                                  \
10222        float32x4_t c_ = (c);                                            \
10223        float32x4_t b_ = (b);                                            \
10224        float32x4_t a_ = (a);                                            \
10225        float32x4_t result;                                              \
10226        float32x4_t t1;                                                  \
10227        __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
10228                 : "=w"(result), "=w"(t1)                                \
10229                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10230                 : /* No clobbers */);                                   \
10231        result;                                                          \
10232      })
10233 
10234 #define vmlaq_lane_s16(a, b, c, d)                                      \
10235   __extension__                                                         \
10236     ({                                                                  \
10237        int16x8_t c_ = (c);                                              \
10238        int16x8_t b_ = (b);                                              \
10239        int16x8_t a_ = (a);                                              \
10240        int16x8_t result;                                                \
10241        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
10242                 : "=w"(result)                                          \
10243                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10244                 : /* No clobbers */);                                   \
10245        result;                                                          \
10246      })
10247 
10248 #define vmlaq_lane_s32(a, b, c, d)                                      \
10249   __extension__                                                         \
10250     ({                                                                  \
10251        int32x4_t c_ = (c);                                              \
10252        int32x4_t b_ = (b);                                              \
10253        int32x4_t a_ = (a);                                              \
10254        int32x4_t result;                                                \
10255        __asm__ ("mla %0.4s, %2.4s, %3.s[%4]"                            \
10256                 : "=w"(result)                                          \
10257                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10258                 : /* No clobbers */);                                   \
10259        result;                                                          \
10260      })
10261 
10262 #define vmlaq_lane_u16(a, b, c, d)                                      \
10263   __extension__                                                         \
10264     ({                                                                  \
10265        uint16x8_t c_ = (c);                                             \
10266        uint16x8_t b_ = (b);                                             \
10267        uint16x8_t a_ = (a);                                             \
10268        uint16x8_t result;                                               \
10269        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
10270                 : "=w"(result)                                          \
10271                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10272                 : /* No clobbers */);                                   \
10273        result;                                                          \
10274      })
10275 
10276 #define vmlaq_lane_u32(a, b, c, d)                                      \
10277   __extension__                                                         \
10278     ({                                                                  \
10279        uint32x4_t c_ = (c);                                             \
10280        uint32x4_t b_ = (b);                                             \
10281        uint32x4_t a_ = (a);                                             \
10282        uint32x4_t result;                                               \
10283        __asm__ ("mla %0.4s, %2.4s, %3.s[%4]"                            \
10284                 : "=w"(result)                                          \
10285                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10286                 : /* No clobbers */);                                   \
10287        result;                                                          \
10288      })
10289 
10290 #define vmlaq_laneq_s16(a, b, c, d)                                     \
10291   __extension__                                                         \
10292     ({                                                                  \
10293        int16x8_t c_ = (c);                                              \
10294        int16x8_t b_ = (b);                                              \
10295        int16x8_t a_ = (a);                                              \
10296        int16x8_t result;                                                \
10297        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
10298                 : "=w"(result)                                          \
10299                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10300                 : /* No clobbers */);                                   \
10301        result;                                                          \
10302      })
10303 
10304 #define vmlaq_laneq_s32(a, b, c, d)                                     \
10305   __extension__                                                         \
10306     ({                                                                  \
10307        int32x4_t c_ = (c);                                              \
10308        int32x4_t b_ = (b);                                              \
10309        int32x4_t a_ = (a);                                              \
10310        int32x4_t result;                                                \
10311        __asm__ ("mla %0.4s, %2.4s, %3.s[%4]"                            \
10312                 : "=w"(result)                                          \
10313                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10314                 : /* No clobbers */);                                   \
10315        result;                                                          \
10316      })
10317 
10318 #define vmlaq_laneq_u16(a, b, c, d)                                     \
10319   __extension__                                                         \
10320     ({                                                                  \
10321        uint16x8_t c_ = (c);                                             \
10322        uint16x8_t b_ = (b);                                             \
10323        uint16x8_t a_ = (a);                                             \
10324        uint16x8_t result;                                               \
10325        __asm__ ("mla %0.8h, %2.8h, %3.h[%4]"                            \
10326                 : "=w"(result)                                          \
10327                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10328                 : /* No clobbers */);                                   \
10329        result;                                                          \
10330      })
10331 
10332 #define vmlaq_laneq_u32(a, b, c, d)                                     \
10333   __extension__                                                         \
10334     ({                                                                  \
10335        uint32x4_t c_ = (c);                                             \
10336        uint32x4_t b_ = (b);                                             \
10337        uint32x4_t a_ = (a);                                             \
10338        uint32x4_t result;                                               \
10339        __asm__ ("mla %0.4s, %2.4s, %3.s[%4]"                            \
10340                 : "=w"(result)                                          \
10341                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10342                 : /* No clobbers */);                                   \
10343        result;                                                          \
10344      })
10345 
10346 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)10347 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
10348 {
10349   float32x4_t result;
10350   float32x4_t t1;
10351   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
10352            : "=w"(result), "=w"(t1)
10353            : "0"(a), "w"(b), "w"(c)
10354            : /* No clobbers */);
10355   return result;
10356 }
10357 
10358 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlaq_n_f64(float64x2_t a,float64x2_t b,float64_t c)10359 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
10360 {
10361   float64x2_t result;
10362   float64x2_t t1;
10363   __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
10364            : "=w"(result), "=w"(t1)
10365            : "0"(a), "w"(b), "w"(c)
10366            : /* No clobbers */);
10367   return result;
10368 }
10369 
10370 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)10371 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
10372 {
10373   int16x8_t result;
10374   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10375            : "=w"(result)
10376            : "0"(a), "w"(b), "w"(c)
10377            : /* No clobbers */);
10378   return result;
10379 }
10380 
10381 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)10382 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
10383 {
10384   int32x4_t result;
10385   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10386            : "=w"(result)
10387            : "0"(a), "w"(b), "w"(c)
10388            : /* No clobbers */);
10389   return result;
10390 }
10391 
10392 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)10393 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
10394 {
10395   uint16x8_t result;
10396   __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10397            : "=w"(result)
10398            : "0"(a), "w"(b), "w"(c)
10399            : /* No clobbers */);
10400   return result;
10401 }
10402 
10403 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)10404 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
10405 {
10406   uint32x4_t result;
10407   __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10408            : "=w"(result)
10409            : "0"(a), "w"(b), "w"(c)
10410            : /* No clobbers */);
10411   return result;
10412 }
10413 
10414 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)10415 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
10416 {
10417   int8x16_t result;
10418   __asm__ ("mla %0.16b, %2.16b, %3.16b"
10419            : "=w"(result)
10420            : "0"(a), "w"(b), "w"(c)
10421            : /* No clobbers */);
10422   return result;
10423 }
10424 
10425 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)10426 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
10427 {
10428   int16x8_t result;
10429   __asm__ ("mla %0.8h, %2.8h, %3.8h"
10430            : "=w"(result)
10431            : "0"(a), "w"(b), "w"(c)
10432            : /* No clobbers */);
10433   return result;
10434 }
10435 
10436 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)10437 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
10438 {
10439   int32x4_t result;
10440   __asm__ ("mla %0.4s, %2.4s, %3.4s"
10441            : "=w"(result)
10442            : "0"(a), "w"(b), "w"(c)
10443            : /* No clobbers */);
10444   return result;
10445 }
10446 
10447 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)10448 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
10449 {
10450   uint8x16_t result;
10451   __asm__ ("mla %0.16b, %2.16b, %3.16b"
10452            : "=w"(result)
10453            : "0"(a), "w"(b), "w"(c)
10454            : /* No clobbers */);
10455   return result;
10456 }
10457 
10458 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)10459 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
10460 {
10461   uint16x8_t result;
10462   __asm__ ("mla %0.8h, %2.8h, %3.8h"
10463            : "=w"(result)
10464            : "0"(a), "w"(b), "w"(c)
10465            : /* No clobbers */);
10466   return result;
10467 }
10468 
10469 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)10470 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
10471 {
10472   uint32x4_t result;
10473   __asm__ ("mla %0.4s, %2.4s, %3.4s"
10474            : "=w"(result)
10475            : "0"(a), "w"(b), "w"(c)
10476            : /* No clobbers */);
10477   return result;
10478 }
10479 
10480 #define vmls_lane_f32(a, b, c, d)                                       \
10481   __extension__                                                         \
10482     ({                                                                  \
10483        float32x2_t c_ = (c);                                            \
10484        float32x2_t b_ = (b);                                            \
10485        float32x2_t a_ = (a);                                            \
10486        float32x2_t result;                                              \
10487        float32x2_t t1;                                                  \
10488        __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
10489                 : "=w"(result), "=w"(t1)                                \
10490                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10491                 : /* No clobbers */);                                   \
10492        result;                                                          \
10493      })
10494 
10495 #define vmls_lane_s16(a, b, c, d)                                       \
10496   __extension__                                                         \
10497     ({                                                                  \
10498        int16x4_t c_ = (c);                                              \
10499        int16x4_t b_ = (b);                                              \
10500        int16x4_t a_ = (a);                                              \
10501        int16x4_t result;                                                \
10502        __asm__ ("mls %0.4h,%2.4h,%3.h[%4]"                              \
10503                 : "=w"(result)                                          \
10504                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10505                 : /* No clobbers */);                                   \
10506        result;                                                          \
10507      })
10508 
10509 #define vmls_lane_s32(a, b, c, d)                                       \
10510   __extension__                                                         \
10511     ({                                                                  \
10512        int32x2_t c_ = (c);                                              \
10513        int32x2_t b_ = (b);                                              \
10514        int32x2_t a_ = (a);                                              \
10515        int32x2_t result;                                                \
10516        __asm__ ("mls %0.2s,%2.2s,%3.s[%4]"                              \
10517                 : "=w"(result)                                          \
10518                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10519                 : /* No clobbers */);                                   \
10520        result;                                                          \
10521      })
10522 
10523 #define vmls_lane_u16(a, b, c, d)                                       \
10524   __extension__                                                         \
10525     ({                                                                  \
10526        uint16x4_t c_ = (c);                                             \
10527        uint16x4_t b_ = (b);                                             \
10528        uint16x4_t a_ = (a);                                             \
10529        uint16x4_t result;                                               \
10530        __asm__ ("mls %0.4h,%2.4h,%3.h[%4]"                              \
10531                 : "=w"(result)                                          \
10532                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10533                 : /* No clobbers */);                                   \
10534        result;                                                          \
10535      })
10536 
10537 #define vmls_lane_u32(a, b, c, d)                                       \
10538   __extension__                                                         \
10539     ({                                                                  \
10540        uint32x2_t c_ = (c);                                             \
10541        uint32x2_t b_ = (b);                                             \
10542        uint32x2_t a_ = (a);                                             \
10543        uint32x2_t result;                                               \
10544        __asm__ ("mls %0.2s,%2.2s,%3.s[%4]"                              \
10545                 : "=w"(result)                                          \
10546                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10547                 : /* No clobbers */);                                   \
10548        result;                                                          \
10549      })
10550 
10551 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)10552 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
10553 {
10554   float32x2_t result;
10555   float32x2_t t1;
10556   __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
10557            : "=w"(result), "=w"(t1)
10558            : "0"(a), "w"(b), "w"(c)
10559            : /* No clobbers */);
10560   return result;
10561 }
10562 
10563 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)10564 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
10565 {
10566   int16x4_t result;
10567   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10568            : "=w"(result)
10569            : "0"(a), "w"(b), "w"(c)
10570            : /* No clobbers */);
10571   return result;
10572 }
10573 
10574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)10575 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
10576 {
10577   int32x2_t result;
10578   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10579            : "=w"(result)
10580            : "0"(a), "w"(b), "w"(c)
10581            : /* No clobbers */);
10582   return result;
10583 }
10584 
10585 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)10586 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
10587 {
10588   uint16x4_t result;
10589   __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10590            : "=w"(result)
10591            : "0"(a), "w"(b), "w"(c)
10592            : /* No clobbers */);
10593   return result;
10594 }
10595 
10596 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)10597 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
10598 {
10599   uint32x2_t result;
10600   __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10601            : "=w"(result)
10602            : "0"(a), "w"(b), "w"(c)
10603            : /* No clobbers */);
10604   return result;
10605 }
10606 
10607 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)10608 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
10609 {
10610   int8x8_t result;
10611   __asm__ ("mls %0.8b,%2.8b,%3.8b"
10612            : "=w"(result)
10613            : "0"(a), "w"(b), "w"(c)
10614            : /* No clobbers */);
10615   return result;
10616 }
10617 
10618 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)10619 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
10620 {
10621   int16x4_t result;
10622   __asm__ ("mls %0.4h,%2.4h,%3.4h"
10623            : "=w"(result)
10624            : "0"(a), "w"(b), "w"(c)
10625            : /* No clobbers */);
10626   return result;
10627 }
10628 
10629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)10630 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
10631 {
10632   int32x2_t result;
10633   __asm__ ("mls %0.2s,%2.2s,%3.2s"
10634            : "=w"(result)
10635            : "0"(a), "w"(b), "w"(c)
10636            : /* No clobbers */);
10637   return result;
10638 }
10639 
10640 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)10641 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
10642 {
10643   uint8x8_t result;
10644   __asm__ ("mls %0.8b,%2.8b,%3.8b"
10645            : "=w"(result)
10646            : "0"(a), "w"(b), "w"(c)
10647            : /* No clobbers */);
10648   return result;
10649 }
10650 
10651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)10652 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
10653 {
10654   uint16x4_t result;
10655   __asm__ ("mls %0.4h,%2.4h,%3.4h"
10656            : "=w"(result)
10657            : "0"(a), "w"(b), "w"(c)
10658            : /* No clobbers */);
10659   return result;
10660 }
10661 
10662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)10663 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
10664 {
10665   uint32x2_t result;
10666   __asm__ ("mls %0.2s,%2.2s,%3.2s"
10667            : "=w"(result)
10668            : "0"(a), "w"(b), "w"(c)
10669            : /* No clobbers */);
10670   return result;
10671 }
10672 
10673 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
10674   __extension__                                                         \
10675     ({                                                                  \
10676        int16x8_t c_ = (c);                                              \
10677        int16x8_t b_ = (b);                                              \
10678        int32x4_t a_ = (a);                                              \
10679        int32x4_t result;                                                \
10680        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
10681                 : "=w"(result)                                          \
10682                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10683                 : /* No clobbers */);                                   \
10684        result;                                                          \
10685      })
10686 
10687 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
10688   __extension__                                                         \
10689     ({                                                                  \
10690        int32x4_t c_ = (c);                                              \
10691        int32x4_t b_ = (b);                                              \
10692        int64x2_t a_ = (a);                                              \
10693        int64x2_t result;                                                \
10694        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
10695                 : "=w"(result)                                          \
10696                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10697                 : /* No clobbers */);                                   \
10698        result;                                                          \
10699      })
10700 
10701 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
10702   __extension__                                                         \
10703     ({                                                                  \
10704        uint16x8_t c_ = (c);                                             \
10705        uint16x8_t b_ = (b);                                             \
10706        uint32x4_t a_ = (a);                                             \
10707        uint32x4_t result;                                               \
10708        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
10709                 : "=w"(result)                                          \
10710                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10711                 : /* No clobbers */);                                   \
10712        result;                                                          \
10713      })
10714 
10715 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
10716   __extension__                                                         \
10717     ({                                                                  \
10718        uint32x4_t c_ = (c);                                             \
10719        uint32x4_t b_ = (b);                                             \
10720        uint64x2_t a_ = (a);                                             \
10721        uint64x2_t result;                                               \
10722        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
10723                 : "=w"(result)                                          \
10724                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10725                 : /* No clobbers */);                                   \
10726        result;                                                          \
10727      })
10728 
10729 #define vmlsl_high_laneq_s16(a, b, c, d)                                \
10730   __extension__                                                         \
10731     ({                                                                  \
10732        int16x8_t c_ = (c);                                              \
10733        int16x8_t b_ = (b);                                              \
10734        int32x4_t a_ = (a);                                              \
10735        int32x4_t result;                                                \
10736        __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
10737                 : "=w"(result)                                          \
10738                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10739                 : /* No clobbers */);                                   \
10740        result;                                                          \
10741      })
10742 
10743 #define vmlsl_high_laneq_s32(a, b, c, d)                                \
10744   __extension__                                                         \
10745     ({                                                                  \
10746        int32x4_t c_ = (c);                                              \
10747        int32x4_t b_ = (b);                                              \
10748        int64x2_t a_ = (a);                                              \
10749        int64x2_t result;                                                \
10750        __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
10751                 : "=w"(result)                                          \
10752                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10753                 : /* No clobbers */);                                   \
10754        result;                                                          \
10755      })
10756 
10757 #define vmlsl_high_laneq_u16(a, b, c, d)                                \
10758   __extension__                                                         \
10759     ({                                                                  \
10760        uint16x8_t c_ = (c);                                             \
10761        uint16x8_t b_ = (b);                                             \
10762        uint32x4_t a_ = (a);                                             \
10763        uint32x4_t result;                                               \
10764        __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]"                         \
10765                 : "=w"(result)                                          \
10766                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10767                 : /* No clobbers */);                                   \
10768        result;                                                          \
10769      })
10770 
10771 #define vmlsl_high_laneq_u32(a, b, c, d)                                \
10772   __extension__                                                         \
10773     ({                                                                  \
10774        uint32x4_t c_ = (c);                                             \
10775        uint32x4_t b_ = (b);                                             \
10776        uint64x2_t a_ = (a);                                             \
10777        uint64x2_t result;                                               \
10778        __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]"                         \
10779                 : "=w"(result)                                          \
10780                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10781                 : /* No clobbers */);                                   \
10782        result;                                                          \
10783      })
10784 
10785 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)10786 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
10787 {
10788   int32x4_t result;
10789   __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
10790            : "=w"(result)
10791            : "0"(a), "w"(b), "w"(c)
10792            : /* No clobbers */);
10793   return result;
10794 }
10795 
10796 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)10797 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
10798 {
10799   int64x2_t result;
10800   __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
10801            : "=w"(result)
10802            : "0"(a), "w"(b), "w"(c)
10803            : /* No clobbers */);
10804   return result;
10805 }
10806 
10807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)10808 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
10809 {
10810   uint32x4_t result;
10811   __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
10812            : "=w"(result)
10813            : "0"(a), "w"(b), "w"(c)
10814            : /* No clobbers */);
10815   return result;
10816 }
10817 
10818 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)10819 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
10820 {
10821   uint64x2_t result;
10822   __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
10823            : "=w"(result)
10824            : "0"(a), "w"(b), "w"(c)
10825            : /* No clobbers */);
10826   return result;
10827 }
10828 
10829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsl_high_s8(int16x8_t a,int8x16_t b,int8x16_t c)10830 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
10831 {
10832   int16x8_t result;
10833   __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
10834            : "=w"(result)
10835            : "0"(a), "w"(b), "w"(c)
10836            : /* No clobbers */);
10837   return result;
10838 }
10839 
10840 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_high_s16(int32x4_t a,int16x8_t b,int16x8_t c)10841 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
10842 {
10843   int32x4_t result;
10844   __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
10845            : "=w"(result)
10846            : "0"(a), "w"(b), "w"(c)
10847            : /* No clobbers */);
10848   return result;
10849 }
10850 
10851 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_high_s32(int64x2_t a,int32x4_t b,int32x4_t c)10852 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
10853 {
10854   int64x2_t result;
10855   __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
10856            : "=w"(result)
10857            : "0"(a), "w"(b), "w"(c)
10858            : /* No clobbers */);
10859   return result;
10860 }
10861 
10862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsl_high_u8(uint16x8_t a,uint8x16_t b,uint8x16_t c)10863 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
10864 {
10865   uint16x8_t result;
10866   __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
10867            : "=w"(result)
10868            : "0"(a), "w"(b), "w"(c)
10869            : /* No clobbers */);
10870   return result;
10871 }
10872 
10873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_high_u16(uint32x4_t a,uint16x8_t b,uint16x8_t c)10874 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
10875 {
10876   uint32x4_t result;
10877   __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
10878            : "=w"(result)
10879            : "0"(a), "w"(b), "w"(c)
10880            : /* No clobbers */);
10881   return result;
10882 }
10883 
10884 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_high_u32(uint64x2_t a,uint32x4_t b,uint32x4_t c)10885 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
10886 {
10887   uint64x2_t result;
10888   __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
10889            : "=w"(result)
10890            : "0"(a), "w"(b), "w"(c)
10891            : /* No clobbers */);
10892   return result;
10893 }
10894 
10895 #define vmlsl_lane_s16(a, b, c, d)                                      \
10896   __extension__                                                         \
10897     ({                                                                  \
10898        int16x4_t c_ = (c);                                              \
10899        int16x4_t b_ = (b);                                              \
10900        int32x4_t a_ = (a);                                              \
10901        int32x4_t result;                                                \
10902        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
10903                 : "=w"(result)                                          \
10904                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10905                 : /* No clobbers */);                                   \
10906        result;                                                          \
10907      })
10908 
10909 #define vmlsl_lane_s32(a, b, c, d)                                      \
10910   __extension__                                                         \
10911     ({                                                                  \
10912        int32x2_t c_ = (c);                                              \
10913        int32x2_t b_ = (b);                                              \
10914        int64x2_t a_ = (a);                                              \
10915        int64x2_t result;                                                \
10916        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
10917                 : "=w"(result)                                          \
10918                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10919                 : /* No clobbers */);                                   \
10920        result;                                                          \
10921      })
10922 
10923 #define vmlsl_lane_u16(a, b, c, d)                                      \
10924   __extension__                                                         \
10925     ({                                                                  \
10926        uint16x4_t c_ = (c);                                             \
10927        uint16x4_t b_ = (b);                                             \
10928        uint32x4_t a_ = (a);                                             \
10929        uint32x4_t result;                                               \
10930        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
10931                 : "=w"(result)                                          \
10932                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10933                 : /* No clobbers */);                                   \
10934        result;                                                          \
10935      })
10936 
10937 #define vmlsl_lane_u32(a, b, c, d)                                      \
10938   __extension__                                                         \
10939     ({                                                                  \
10940        uint32x2_t c_ = (c);                                             \
10941        uint32x2_t b_ = (b);                                             \
10942        uint64x2_t a_ = (a);                                             \
10943        uint64x2_t result;                                               \
10944        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
10945                 : "=w"(result)                                          \
10946                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10947                 : /* No clobbers */);                                   \
10948        result;                                                          \
10949      })
10950 
10951 #define vmlsl_laneq_s16(a, b, c, d)                                     \
10952   __extension__                                                         \
10953     ({                                                                  \
10954        int16x8_t c_ = (c);                                              \
10955        int16x4_t b_ = (b);                                              \
10956        int32x4_t a_ = (a);                                              \
10957        int32x4_t result;                                                \
10958        __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]"                          \
10959                 : "=w"(result)                                          \
10960                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10961                 : /* No clobbers */);                                   \
10962        result;                                                          \
10963      })
10964 
10965 #define vmlsl_laneq_s32(a, b, c, d)                                     \
10966   __extension__                                                         \
10967     ({                                                                  \
10968        int32x4_t c_ = (c);                                              \
10969        int32x2_t b_ = (b);                                              \
10970        int64x2_t a_ = (a);                                              \
10971        int64x2_t result;                                                \
10972        __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]"                          \
10973                 : "=w"(result)                                          \
10974                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10975                 : /* No clobbers */);                                   \
10976        result;                                                          \
10977      })
10978 
10979 #define vmlsl_laneq_u16(a, b, c, d)                                     \
10980   __extension__                                                         \
10981     ({                                                                  \
10982        uint16x8_t c_ = (c);                                             \
10983        uint16x4_t b_ = (b);                                             \
10984        uint32x4_t a_ = (a);                                             \
10985        uint32x4_t result;                                               \
10986        __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]"                          \
10987                 : "=w"(result)                                          \
10988                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
10989                 : /* No clobbers */);                                   \
10990        result;                                                          \
10991      })
10992 
10993 #define vmlsl_laneq_u32(a, b, c, d)                                     \
10994   __extension__                                                         \
10995     ({                                                                  \
10996        uint32x4_t c_ = (c);                                             \
10997        uint32x2_t b_ = (b);                                             \
10998        uint64x2_t a_ = (a);                                             \
10999        uint64x2_t result;                                               \
11000        __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]"                          \
11001                 : "=w"(result)                                          \
11002                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11003                 : /* No clobbers */);                                   \
11004        result;                                                          \
11005      })
11006 
11007 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)11008 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
11009 {
11010   int32x4_t result;
11011   __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
11012            : "=w"(result)
11013            : "0"(a), "w"(b), "w"(c)
11014            : /* No clobbers */);
11015   return result;
11016 }
11017 
11018 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)11019 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
11020 {
11021   int64x2_t result;
11022   __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
11023            : "=w"(result)
11024            : "0"(a), "w"(b), "w"(c)
11025            : /* No clobbers */);
11026   return result;
11027 }
11028 
11029 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)11030 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
11031 {
11032   uint32x4_t result;
11033   __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
11034            : "=w"(result)
11035            : "0"(a), "w"(b), "w"(c)
11036            : /* No clobbers */);
11037   return result;
11038 }
11039 
11040 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)11041 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
11042 {
11043   uint64x2_t result;
11044   __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
11045            : "=w"(result)
11046            : "0"(a), "w"(b), "w"(c)
11047            : /* No clobbers */);
11048   return result;
11049 }
11050 
11051 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)11052 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
11053 {
11054   int16x8_t result;
11055   __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
11056            : "=w"(result)
11057            : "0"(a), "w"(b), "w"(c)
11058            : /* No clobbers */);
11059   return result;
11060 }
11061 
11062 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)11063 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
11064 {
11065   int32x4_t result;
11066   __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
11067            : "=w"(result)
11068            : "0"(a), "w"(b), "w"(c)
11069            : /* No clobbers */);
11070   return result;
11071 }
11072 
11073 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)11074 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
11075 {
11076   int64x2_t result;
11077   __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
11078            : "=w"(result)
11079            : "0"(a), "w"(b), "w"(c)
11080            : /* No clobbers */);
11081   return result;
11082 }
11083 
11084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)11085 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
11086 {
11087   uint16x8_t result;
11088   __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
11089            : "=w"(result)
11090            : "0"(a), "w"(b), "w"(c)
11091            : /* No clobbers */);
11092   return result;
11093 }
11094 
11095 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)11096 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
11097 {
11098   uint32x4_t result;
11099   __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
11100            : "=w"(result)
11101            : "0"(a), "w"(b), "w"(c)
11102            : /* No clobbers */);
11103   return result;
11104 }
11105 
11106 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)11107 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
11108 {
11109   uint64x2_t result;
11110   __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
11111            : "=w"(result)
11112            : "0"(a), "w"(b), "w"(c)
11113            : /* No clobbers */);
11114   return result;
11115 }
11116 
11117 #define vmlsq_lane_f32(a, b, c, d)                                      \
11118   __extension__                                                         \
11119     ({                                                                  \
11120        float32x4_t c_ = (c);                                            \
11121        float32x4_t b_ = (b);                                            \
11122        float32x4_t a_ = (a);                                            \
11123        float32x4_t result;                                              \
11124        float32x4_t t1;                                                  \
11125        __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
11126                 : "=w"(result), "=w"(t1)                                \
11127                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11128                 : /* No clobbers */);                                   \
11129        result;                                                          \
11130      })
11131 
11132 #define vmlsq_lane_s16(a, b, c, d)                                      \
11133   __extension__                                                         \
11134     ({                                                                  \
11135        int16x8_t c_ = (c);                                              \
11136        int16x8_t b_ = (b);                                              \
11137        int16x8_t a_ = (a);                                              \
11138        int16x8_t result;                                                \
11139        __asm__ ("mls %0.8h,%2.8h,%3.h[%4]"                              \
11140                 : "=w"(result)                                          \
11141                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11142                 : /* No clobbers */);                                   \
11143        result;                                                          \
11144      })
11145 
11146 #define vmlsq_lane_s32(a, b, c, d)                                      \
11147   __extension__                                                         \
11148     ({                                                                  \
11149        int32x4_t c_ = (c);                                              \
11150        int32x4_t b_ = (b);                                              \
11151        int32x4_t a_ = (a);                                              \
11152        int32x4_t result;                                                \
11153        __asm__ ("mls %0.4s,%2.4s,%3.s[%4]"                              \
11154                 : "=w"(result)                                          \
11155                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11156                 : /* No clobbers */);                                   \
11157        result;                                                          \
11158      })
11159 
11160 #define vmlsq_lane_u16(a, b, c, d)                                      \
11161   __extension__                                                         \
11162     ({                                                                  \
11163        uint16x8_t c_ = (c);                                             \
11164        uint16x8_t b_ = (b);                                             \
11165        uint16x8_t a_ = (a);                                             \
11166        uint16x8_t result;                                               \
11167        __asm__ ("mls %0.8h,%2.8h,%3.h[%4]"                              \
11168                 : "=w"(result)                                          \
11169                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11170                 : /* No clobbers */);                                   \
11171        result;                                                          \
11172      })
11173 
11174 #define vmlsq_lane_u32(a, b, c, d)                                      \
11175   __extension__                                                         \
11176     ({                                                                  \
11177        uint32x4_t c_ = (c);                                             \
11178        uint32x4_t b_ = (b);                                             \
11179        uint32x4_t a_ = (a);                                             \
11180        uint32x4_t result;                                               \
11181        __asm__ ("mls %0.4s,%2.4s,%3.s[%4]"                              \
11182                 : "=w"(result)                                          \
11183                 : "0"(a_), "w"(b_), "w"(c_), "i"(d)                     \
11184                 : /* No clobbers */);                                   \
11185        result;                                                          \
11186      })
11187 
11188 #define vmlsq_laneq_f32(__a, __b, __c, __d)				\
11189   __extension__								\
11190     ({									\
11191        float32x4_t __c_ = (__c);					\
11192        float32x4_t __b_ = (__b);					\
11193        float32x4_t __a_ = (__a);					\
11194        float32x4_t __result;						\
11195        float32x4_t __t1;						\
11196        __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s"	\
11197                 : "=w"(__result), "=w"(__t1)				\
11198                 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
11199                 : /* No clobbers */);					\
11200        __result;							\
11201      })
11202 
11203 #define vmlsq_laneq_s16(__a, __b, __c, __d)				\
11204   __extension__								\
11205     ({									\
11206        int16x8_t __c_ = (__c);						\
11207        int16x8_t __b_ = (__b);						\
11208        int16x8_t __a_ = (__a);						\
11209        int16x8_t __result;						\
11210        __asm__ ("mls %0.8h, %2.8h, %3.h[%4]"				\
11211                 : "=w"(__result)					\
11212                 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
11213                 : /* No clobbers */);					\
11214        __result;							\
11215      })
11216 
11217 #define vmlsq_laneq_s32(__a, __b, __c, __d)				\
11218   __extension__								\
11219     ({									\
11220        int32x4_t __c_ = (__c);						\
11221        int32x4_t __b_ = (__b);						\
11222        int32x4_t __a_ = (__a);						\
11223        int32x4_t __result;						\
11224        __asm__ ("mls %0.4s, %2.4s, %3.s[%4]"				\
11225                 : "=w"(__result)					\
11226                 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
11227                 : /* No clobbers */);					\
11228        __result;							\
11229      })
11230 
11231 #define vmlsq_laneq_u16(__a, __b, __c, __d)				\
11232   __extension__								\
11233     ({									\
11234        uint16x8_t __c_ = (__c);						\
11235        uint16x8_t __b_ = (__b);						\
11236        uint16x8_t __a_ = (__a);						\
11237        uint16x8_t __result;						\
11238        __asm__ ("mls %0.8h, %2.8h, %3.h[%4]"				\
11239                 : "=w"(__result)					\
11240                 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
11241                 : /* No clobbers */);					\
11242        __result;							\
11243      })
11244 
11245 #define vmlsq_laneq_u32(__a, __b, __c, __d)				\
11246   __extension__								\
11247     ({									\
11248        uint32x4_t __c_ = (__c);						\
11249        uint32x4_t __b_ = (__b);						\
11250        uint32x4_t __a_ = (__a);						\
11251        uint32x4_t __result;						\
11252        __asm__ ("mls %0.4s, %2.4s, %3.s[%4]"				\
11253                 : "=w"(__result)					\
11254                 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d)		\
11255                 : /* No clobbers */);					\
11256        __result;							\
11257      })
11258 
11259 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)11260 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
11261 {
11262   float32x4_t result;
11263   float32x4_t t1;
11264   __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
11265            : "=w"(result), "=w"(t1)
11266            : "0"(a), "w"(b), "w"(c)
11267            : /* No clobbers */);
11268   return result;
11269 }
11270 
11271 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlsq_n_f64(float64x2_t a,float64x2_t b,float64_t c)11272 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
11273 {
11274   float64x2_t result;
11275   float64x2_t t1;
11276   __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
11277            : "=w"(result), "=w"(t1)
11278            : "0"(a), "w"(b), "w"(c)
11279            : /* No clobbers */);
11280   return result;
11281 }
11282 
11283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)11284 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
11285 {
11286   int16x8_t result;
11287   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11288            : "=w"(result)
11289            : "0"(a), "w"(b), "w"(c)
11290            : /* No clobbers */);
11291   return result;
11292 }
11293 
11294 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)11295 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
11296 {
11297   int32x4_t result;
11298   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11299            : "=w"(result)
11300            : "0"(a), "w"(b), "w"(c)
11301            : /* No clobbers */);
11302   return result;
11303 }
11304 
11305 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)11306 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
11307 {
11308   uint16x8_t result;
11309   __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11310            : "=w"(result)
11311            : "0"(a), "w"(b), "w"(c)
11312            : /* No clobbers */);
11313   return result;
11314 }
11315 
11316 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)11317 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
11318 {
11319   uint32x4_t result;
11320   __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11321            : "=w"(result)
11322            : "0"(a), "w"(b), "w"(c)
11323            : /* No clobbers */);
11324   return result;
11325 }
11326 
11327 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)11328 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
11329 {
11330   int8x16_t result;
11331   __asm__ ("mls %0.16b,%2.16b,%3.16b"
11332            : "=w"(result)
11333            : "0"(a), "w"(b), "w"(c)
11334            : /* No clobbers */);
11335   return result;
11336 }
11337 
11338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)11339 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
11340 {
11341   int16x8_t result;
11342   __asm__ ("mls %0.8h,%2.8h,%3.8h"
11343            : "=w"(result)
11344            : "0"(a), "w"(b), "w"(c)
11345            : /* No clobbers */);
11346   return result;
11347 }
11348 
11349 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)11350 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
11351 {
11352   int32x4_t result;
11353   __asm__ ("mls %0.4s,%2.4s,%3.4s"
11354            : "=w"(result)
11355            : "0"(a), "w"(b), "w"(c)
11356            : /* No clobbers */);
11357   return result;
11358 }
11359 
11360 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)11361 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
11362 {
11363   uint8x16_t result;
11364   __asm__ ("mls %0.16b,%2.16b,%3.16b"
11365            : "=w"(result)
11366            : "0"(a), "w"(b), "w"(c)
11367            : /* No clobbers */);
11368   return result;
11369 }
11370 
11371 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)11372 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
11373 {
11374   uint16x8_t result;
11375   __asm__ ("mls %0.8h,%2.8h,%3.8h"
11376            : "=w"(result)
11377            : "0"(a), "w"(b), "w"(c)
11378            : /* No clobbers */);
11379   return result;
11380 }
11381 
11382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)11383 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
11384 {
11385   uint32x4_t result;
11386   __asm__ ("mls %0.4s,%2.4s,%3.4s"
11387            : "=w"(result)
11388            : "0"(a), "w"(b), "w"(c)
11389            : /* No clobbers */);
11390   return result;
11391 }
11392 
11393 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmov_n_f32(float32_t a)11394 vmov_n_f32 (float32_t a)
11395 {
11396   float32x2_t result;
11397   __asm__ ("dup %0.2s, %w1"
11398            : "=w"(result)
11399            : "r"(a)
11400            : /* No clobbers */);
11401   return result;
11402 }
11403 
11404 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmov_n_p8(uint32_t a)11405 vmov_n_p8 (uint32_t a)
11406 {
11407   poly8x8_t result;
11408   __asm__ ("dup %0.8b,%w1"
11409            : "=w"(result)
11410            : "r"(a)
11411            : /* No clobbers */);
11412   return result;
11413 }
11414 
11415 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vmov_n_p16(uint32_t a)11416 vmov_n_p16 (uint32_t a)
11417 {
11418   poly16x4_t result;
11419   __asm__ ("dup %0.4h,%w1"
11420            : "=w"(result)
11421            : "r"(a)
11422            : /* No clobbers */);
11423   return result;
11424 }
11425 
11426 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmov_n_s8(int32_t a)11427 vmov_n_s8 (int32_t a)
11428 {
11429   int8x8_t result;
11430   __asm__ ("dup %0.8b,%w1"
11431            : "=w"(result)
11432            : "r"(a)
11433            : /* No clobbers */);
11434   return result;
11435 }
11436 
11437 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmov_n_s16(int32_t a)11438 vmov_n_s16 (int32_t a)
11439 {
11440   int16x4_t result;
11441   __asm__ ("dup %0.4h,%w1"
11442            : "=w"(result)
11443            : "r"(a)
11444            : /* No clobbers */);
11445   return result;
11446 }
11447 
11448 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmov_n_s32(int32_t a)11449 vmov_n_s32 (int32_t a)
11450 {
11451   int32x2_t result;
11452   __asm__ ("dup %0.2s,%w1"
11453            : "=w"(result)
11454            : "r"(a)
11455            : /* No clobbers */);
11456   return result;
11457 }
11458 
11459 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vmov_n_s64(int64_t a)11460 vmov_n_s64 (int64_t a)
11461 {
11462   int64x1_t result;
11463   __asm__ ("ins %0.d[0],%x1"
11464            : "=w"(result)
11465            : "r"(a)
11466            : /* No clobbers */);
11467   return result;
11468 }
11469 
11470 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmov_n_u8(uint32_t a)11471 vmov_n_u8 (uint32_t a)
11472 {
11473   uint8x8_t result;
11474   __asm__ ("dup %0.8b,%w1"
11475            : "=w"(result)
11476            : "r"(a)
11477            : /* No clobbers */);
11478   return result;
11479 }
11480 
11481 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmov_n_u16(uint32_t a)11482 vmov_n_u16 (uint32_t a)
11483 {
11484   uint16x4_t result;
11485   __asm__ ("dup %0.4h,%w1"
11486            : "=w"(result)
11487            : "r"(a)
11488            : /* No clobbers */);
11489   return result;
11490 }
11491 
11492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmov_n_u32(uint32_t a)11493 vmov_n_u32 (uint32_t a)
11494 {
11495   uint32x2_t result;
11496   __asm__ ("dup %0.2s,%w1"
11497            : "=w"(result)
11498            : "r"(a)
11499            : /* No clobbers */);
11500   return result;
11501 }
11502 
11503 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vmov_n_u64(uint64_t a)11504 vmov_n_u64 (uint64_t a)
11505 {
11506   uint64x1_t result;
11507   __asm__ ("ins %0.d[0],%x1"
11508            : "=w"(result)
11509            : "r"(a)
11510            : /* No clobbers */);
11511   return result;
11512 }
11513 
11514 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovl_high_s8(int8x16_t a)11515 vmovl_high_s8 (int8x16_t a)
11516 {
11517   int16x8_t result;
11518   __asm__ ("sshll2 %0.8h,%1.16b,#0"
11519            : "=w"(result)
11520            : "w"(a)
11521            : /* No clobbers */);
11522   return result;
11523 }
11524 
11525 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovl_high_s16(int16x8_t a)11526 vmovl_high_s16 (int16x8_t a)
11527 {
11528   int32x4_t result;
11529   __asm__ ("sshll2 %0.4s,%1.8h,#0"
11530            : "=w"(result)
11531            : "w"(a)
11532            : /* No clobbers */);
11533   return result;
11534 }
11535 
11536 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovl_high_s32(int32x4_t a)11537 vmovl_high_s32 (int32x4_t a)
11538 {
11539   int64x2_t result;
11540   __asm__ ("sshll2 %0.2d,%1.4s,#0"
11541            : "=w"(result)
11542            : "w"(a)
11543            : /* No clobbers */);
11544   return result;
11545 }
11546 
11547 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovl_high_u8(uint8x16_t a)11548 vmovl_high_u8 (uint8x16_t a)
11549 {
11550   uint16x8_t result;
11551   __asm__ ("ushll2 %0.8h,%1.16b,#0"
11552            : "=w"(result)
11553            : "w"(a)
11554            : /* No clobbers */);
11555   return result;
11556 }
11557 
11558 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovl_high_u16(uint16x8_t a)11559 vmovl_high_u16 (uint16x8_t a)
11560 {
11561   uint32x4_t result;
11562   __asm__ ("ushll2 %0.4s,%1.8h,#0"
11563            : "=w"(result)
11564            : "w"(a)
11565            : /* No clobbers */);
11566   return result;
11567 }
11568 
11569 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovl_high_u32(uint32x4_t a)11570 vmovl_high_u32 (uint32x4_t a)
11571 {
11572   uint64x2_t result;
11573   __asm__ ("ushll2 %0.2d,%1.4s,#0"
11574            : "=w"(result)
11575            : "w"(a)
11576            : /* No clobbers */);
11577   return result;
11578 }
11579 
11580 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovl_s8(int8x8_t a)11581 vmovl_s8 (int8x8_t a)
11582 {
11583   int16x8_t result;
11584   __asm__ ("sshll %0.8h,%1.8b,#0"
11585            : "=w"(result)
11586            : "w"(a)
11587            : /* No clobbers */);
11588   return result;
11589 }
11590 
11591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovl_s16(int16x4_t a)11592 vmovl_s16 (int16x4_t a)
11593 {
11594   int32x4_t result;
11595   __asm__ ("sshll %0.4s,%1.4h,#0"
11596            : "=w"(result)
11597            : "w"(a)
11598            : /* No clobbers */);
11599   return result;
11600 }
11601 
11602 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovl_s32(int32x2_t a)11603 vmovl_s32 (int32x2_t a)
11604 {
11605   int64x2_t result;
11606   __asm__ ("sshll %0.2d,%1.2s,#0"
11607            : "=w"(result)
11608            : "w"(a)
11609            : /* No clobbers */);
11610   return result;
11611 }
11612 
11613 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovl_u8(uint8x8_t a)11614 vmovl_u8 (uint8x8_t a)
11615 {
11616   uint16x8_t result;
11617   __asm__ ("ushll %0.8h,%1.8b,#0"
11618            : "=w"(result)
11619            : "w"(a)
11620            : /* No clobbers */);
11621   return result;
11622 }
11623 
11624 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovl_u16(uint16x4_t a)11625 vmovl_u16 (uint16x4_t a)
11626 {
11627   uint32x4_t result;
11628   __asm__ ("ushll %0.4s,%1.4h,#0"
11629            : "=w"(result)
11630            : "w"(a)
11631            : /* No clobbers */);
11632   return result;
11633 }
11634 
11635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovl_u32(uint32x2_t a)11636 vmovl_u32 (uint32x2_t a)
11637 {
11638   uint64x2_t result;
11639   __asm__ ("ushll %0.2d,%1.2s,#0"
11640            : "=w"(result)
11641            : "w"(a)
11642            : /* No clobbers */);
11643   return result;
11644 }
11645 
11646 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovn_high_s16(int8x8_t a,int16x8_t b)11647 vmovn_high_s16 (int8x8_t a, int16x8_t b)
11648 {
11649   int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
11650   __asm__ ("xtn2 %0.16b,%1.8h"
11651            : "+w"(result)
11652            : "w"(b)
11653            : /* No clobbers */);
11654   return result;
11655 }
11656 
11657 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovn_high_s32(int16x4_t a,int32x4_t b)11658 vmovn_high_s32 (int16x4_t a, int32x4_t b)
11659 {
11660   int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
11661   __asm__ ("xtn2 %0.8h,%1.4s"
11662            : "+w"(result)
11663            : "w"(b)
11664            : /* No clobbers */);
11665   return result;
11666 }
11667 
11668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovn_high_s64(int32x2_t a,int64x2_t b)11669 vmovn_high_s64 (int32x2_t a, int64x2_t b)
11670 {
11671   int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
11672   __asm__ ("xtn2 %0.4s,%1.2d"
11673            : "+w"(result)
11674            : "w"(b)
11675            : /* No clobbers */);
11676   return result;
11677 }
11678 
11679 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmovn_high_u16(uint8x8_t a,uint16x8_t b)11680 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
11681 {
11682   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
11683   __asm__ ("xtn2 %0.16b,%1.8h"
11684            : "+w"(result)
11685            : "w"(b)
11686            : /* No clobbers */);
11687   return result;
11688 }
11689 
11690 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovn_high_u32(uint16x4_t a,uint32x4_t b)11691 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
11692 {
11693   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
11694   __asm__ ("xtn2 %0.8h,%1.4s"
11695            : "+w"(result)
11696            : "w"(b)
11697            : /* No clobbers */);
11698   return result;
11699 }
11700 
11701 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovn_high_u64(uint32x2_t a,uint64x2_t b)11702 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
11703 {
11704   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
11705   __asm__ ("xtn2 %0.4s,%1.2d"
11706            : "+w"(result)
11707            : "w"(b)
11708            : /* No clobbers */);
11709   return result;
11710 }
11711 
11712 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmovn_s16(int16x8_t a)11713 vmovn_s16 (int16x8_t a)
11714 {
11715   int8x8_t result;
11716   __asm__ ("xtn %0.8b,%1.8h"
11717            : "=w"(result)
11718            : "w"(a)
11719            : /* No clobbers */);
11720   return result;
11721 }
11722 
11723 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmovn_s32(int32x4_t a)11724 vmovn_s32 (int32x4_t a)
11725 {
11726   int16x4_t result;
11727   __asm__ ("xtn %0.4h,%1.4s"
11728            : "=w"(result)
11729            : "w"(a)
11730            : /* No clobbers */);
11731   return result;
11732 }
11733 
11734 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmovn_s64(int64x2_t a)11735 vmovn_s64 (int64x2_t a)
11736 {
11737   int32x2_t result;
11738   __asm__ ("xtn %0.2s,%1.2d"
11739            : "=w"(result)
11740            : "w"(a)
11741            : /* No clobbers */);
11742   return result;
11743 }
11744 
11745 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmovn_u16(uint16x8_t a)11746 vmovn_u16 (uint16x8_t a)
11747 {
11748   uint8x8_t result;
11749   __asm__ ("xtn %0.8b,%1.8h"
11750            : "=w"(result)
11751            : "w"(a)
11752            : /* No clobbers */);
11753   return result;
11754 }
11755 
11756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmovn_u32(uint32x4_t a)11757 vmovn_u32 (uint32x4_t a)
11758 {
11759   uint16x4_t result;
11760   __asm__ ("xtn %0.4h,%1.4s"
11761            : "=w"(result)
11762            : "w"(a)
11763            : /* No clobbers */);
11764   return result;
11765 }
11766 
11767 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmovn_u64(uint64x2_t a)11768 vmovn_u64 (uint64x2_t a)
11769 {
11770   uint32x2_t result;
11771   __asm__ ("xtn %0.2s,%1.2d"
11772            : "=w"(result)
11773            : "w"(a)
11774            : /* No clobbers */);
11775   return result;
11776 }
11777 
11778 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmovq_n_f32(float32_t a)11779 vmovq_n_f32 (float32_t a)
11780 {
11781   float32x4_t result;
11782   __asm__ ("dup %0.4s, %w1"
11783            : "=w"(result)
11784            : "r"(a)
11785            : /* No clobbers */);
11786   return result;
11787 }
11788 
11789 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmovq_n_f64(float64_t a)11790 vmovq_n_f64 (float64_t a)
11791 {
11792   return (float64x2_t) {a, a};
11793 }
11794 
11795 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmovq_n_p8(uint32_t a)11796 vmovq_n_p8 (uint32_t a)
11797 {
11798   poly8x16_t result;
11799   __asm__ ("dup %0.16b,%w1"
11800            : "=w"(result)
11801            : "r"(a)
11802            : /* No clobbers */);
11803   return result;
11804 }
11805 
11806 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmovq_n_p16(uint32_t a)11807 vmovq_n_p16 (uint32_t a)
11808 {
11809   poly16x8_t result;
11810   __asm__ ("dup %0.8h,%w1"
11811            : "=w"(result)
11812            : "r"(a)
11813            : /* No clobbers */);
11814   return result;
11815 }
11816 
11817 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovq_n_s8(int32_t a)11818 vmovq_n_s8 (int32_t a)
11819 {
11820   int8x16_t result;
11821   __asm__ ("dup %0.16b,%w1"
11822            : "=w"(result)
11823            : "r"(a)
11824            : /* No clobbers */);
11825   return result;
11826 }
11827 
11828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovq_n_s16(int32_t a)11829 vmovq_n_s16 (int32_t a)
11830 {
11831   int16x8_t result;
11832   __asm__ ("dup %0.8h,%w1"
11833            : "=w"(result)
11834            : "r"(a)
11835            : /* No clobbers */);
11836   return result;
11837 }
11838 
11839 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovq_n_s32(int32_t a)11840 vmovq_n_s32 (int32_t a)
11841 {
11842   int32x4_t result;
11843   __asm__ ("dup %0.4s,%w1"
11844            : "=w"(result)
11845            : "r"(a)
11846            : /* No clobbers */);
11847   return result;
11848 }
11849 
11850 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmovq_n_s64(int64_t a)11851 vmovq_n_s64 (int64_t a)
11852 {
11853   int64x2_t result;
11854   __asm__ ("dup %0.2d,%x1"
11855            : "=w"(result)
11856            : "r"(a)
11857            : /* No clobbers */);
11858   return result;
11859 }
11860 
11861 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmovq_n_u8(uint32_t a)11862 vmovq_n_u8 (uint32_t a)
11863 {
11864   uint8x16_t result;
11865   __asm__ ("dup %0.16b,%w1"
11866            : "=w"(result)
11867            : "r"(a)
11868            : /* No clobbers */);
11869   return result;
11870 }
11871 
11872 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovq_n_u16(uint32_t a)11873 vmovq_n_u16 (uint32_t a)
11874 {
11875   uint16x8_t result;
11876   __asm__ ("dup %0.8h,%w1"
11877            : "=w"(result)
11878            : "r"(a)
11879            : /* No clobbers */);
11880   return result;
11881 }
11882 
11883 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovq_n_u32(uint32_t a)11884 vmovq_n_u32 (uint32_t a)
11885 {
11886   uint32x4_t result;
11887   __asm__ ("dup %0.4s,%w1"
11888            : "=w"(result)
11889            : "r"(a)
11890            : /* No clobbers */);
11891   return result;
11892 }
11893 
11894 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmovq_n_u64(uint64_t a)11895 vmovq_n_u64 (uint64_t a)
11896 {
11897   uint64x2_t result;
11898   __asm__ ("dup %0.2d,%x1"
11899            : "=w"(result)
11900            : "r"(a)
11901            : /* No clobbers */);
11902   return result;
11903 }
11904 
11905 #define vmul_lane_f32(a, b, c)                                          \
11906   __extension__                                                         \
11907     ({                                                                  \
11908        float32x2_t b_ = (b);                                            \
11909        float32x2_t a_ = (a);                                            \
11910        float32x2_t result;                                              \
11911        __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]"                             \
11912                 : "=w"(result)                                          \
11913                 : "w"(a_), "w"(b_), "i"(c)                              \
11914                 : /* No clobbers */);                                   \
11915        result;                                                          \
11916      })
11917 
11918 #define vmul_lane_s16(a, b, c)                                          \
11919   __extension__                                                         \
11920     ({                                                                  \
11921        int16x4_t b_ = (b);                                              \
11922        int16x4_t a_ = (a);                                              \
11923        int16x4_t result;                                                \
11924        __asm__ ("mul %0.4h,%1.4h,%2.h[%3]"                              \
11925                 : "=w"(result)                                          \
11926                 : "w"(a_), "w"(b_), "i"(c)                              \
11927                 : /* No clobbers */);                                   \
11928        result;                                                          \
11929      })
11930 
11931 #define vmul_lane_s32(a, b, c)                                          \
11932   __extension__                                                         \
11933     ({                                                                  \
11934        int32x2_t b_ = (b);                                              \
11935        int32x2_t a_ = (a);                                              \
11936        int32x2_t result;                                                \
11937        __asm__ ("mul %0.2s,%1.2s,%2.s[%3]"                              \
11938                 : "=w"(result)                                          \
11939                 : "w"(a_), "w"(b_), "i"(c)                              \
11940                 : /* No clobbers */);                                   \
11941        result;                                                          \
11942      })
11943 
11944 #define vmul_lane_u16(a, b, c)                                          \
11945   __extension__                                                         \
11946     ({                                                                  \
11947        uint16x4_t b_ = (b);                                             \
11948        uint16x4_t a_ = (a);                                             \
11949        uint16x4_t result;                                               \
11950        __asm__ ("mul %0.4h,%1.4h,%2.h[%3]"                              \
11951                 : "=w"(result)                                          \
11952                 : "w"(a_), "w"(b_), "i"(c)                              \
11953                 : /* No clobbers */);                                   \
11954        result;                                                          \
11955      })
11956 
11957 #define vmul_lane_u32(a, b, c)                                          \
11958   __extension__                                                         \
11959     ({                                                                  \
11960        uint32x2_t b_ = (b);                                             \
11961        uint32x2_t a_ = (a);                                             \
11962        uint32x2_t result;                                               \
11963        __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
11964                 : "=w"(result)                                          \
11965                 : "w"(a_), "w"(b_), "i"(c)                              \
11966                 : /* No clobbers */);                                   \
11967        result;                                                          \
11968      })
11969 
11970 #define vmul_laneq_f32(a, b, c)                                         \
11971   __extension__                                                         \
11972     ({                                                                  \
11973        float32x4_t b_ = (b);                                            \
11974        float32x2_t a_ = (a);                                            \
11975        float32x2_t result;                                              \
11976        __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]"                           \
11977                 : "=w"(result)                                          \
11978                 : "w"(a_), "w"(b_), "i"(c)                              \
11979                 : /* No clobbers */);                                   \
11980        result;                                                          \
11981      })
11982 
11983 #define vmul_laneq_s16(a, b, c)                                         \
11984   __extension__                                                         \
11985     ({                                                                  \
11986        int16x8_t b_ = (b);                                              \
11987        int16x4_t a_ = (a);                                              \
11988        int16x4_t result;                                                \
11989        __asm__ ("mul %0.4h, %1.4h, %2.h[%3]"                            \
11990                 : "=w"(result)                                          \
11991                 : "w"(a_), "w"(b_), "i"(c)                              \
11992                 : /* No clobbers */);                                   \
11993        result;                                                          \
11994      })
11995 
11996 #define vmul_laneq_s32(a, b, c)                                         \
11997   __extension__                                                         \
11998     ({                                                                  \
11999        int32x4_t b_ = (b);                                              \
12000        int32x2_t a_ = (a);                                              \
12001        int32x2_t result;                                                \
12002        __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
12003                 : "=w"(result)                                          \
12004                 : "w"(a_), "w"(b_), "i"(c)                              \
12005                 : /* No clobbers */);                                   \
12006        result;                                                          \
12007      })
12008 
12009 #define vmul_laneq_u16(a, b, c)                                         \
12010   __extension__                                                         \
12011     ({                                                                  \
12012        uint16x8_t b_ = (b);                                             \
12013        uint16x4_t a_ = (a);                                             \
12014        uint16x4_t result;                                               \
12015        __asm__ ("mul %0.4h, %1.4h, %2.h[%3]"                            \
12016                 : "=w"(result)                                          \
12017                 : "w"(a_), "w"(b_), "i"(c)                              \
12018                 : /* No clobbers */);                                   \
12019        result;                                                          \
12020      })
12021 
12022 #define vmul_laneq_u32(a, b, c)                                         \
12023   __extension__                                                         \
12024     ({                                                                  \
12025        uint32x4_t b_ = (b);                                             \
12026        uint32x2_t a_ = (a);                                             \
12027        uint32x2_t result;                                               \
12028        __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
12029                 : "=w"(result)                                          \
12030                 : "w"(a_), "w"(b_), "i"(c)                              \
12031                 : /* No clobbers */);                                   \
12032        result;                                                          \
12033      })
12034 
12035 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_n_f32(float32x2_t a,float32_t b)12036 vmul_n_f32 (float32x2_t a, float32_t b)
12037 {
12038   float32x2_t result;
12039   __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
12040            : "=w"(result)
12041            : "w"(a), "w"(b)
12042            : /* No clobbers */);
12043   return result;
12044 }
12045 
12046 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_n_s16(int16x4_t a,int16_t b)12047 vmul_n_s16 (int16x4_t a, int16_t b)
12048 {
12049   int16x4_t result;
12050   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12051            : "=w"(result)
12052            : "w"(a), "w"(b)
12053            : /* No clobbers */);
12054   return result;
12055 }
12056 
12057 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_n_s32(int32x2_t a,int32_t b)12058 vmul_n_s32 (int32x2_t a, int32_t b)
12059 {
12060   int32x2_t result;
12061   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12062            : "=w"(result)
12063            : "w"(a), "w"(b)
12064            : /* No clobbers */);
12065   return result;
12066 }
12067 
12068 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_n_u16(uint16x4_t a,uint16_t b)12069 vmul_n_u16 (uint16x4_t a, uint16_t b)
12070 {
12071   uint16x4_t result;
12072   __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12073            : "=w"(result)
12074            : "w"(a), "w"(b)
12075            : /* No clobbers */);
12076   return result;
12077 }
12078 
12079 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_n_u32(uint32x2_t a,uint32_t b)12080 vmul_n_u32 (uint32x2_t a, uint32_t b)
12081 {
12082   uint32x2_t result;
12083   __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12084            : "=w"(result)
12085            : "w"(a), "w"(b)
12086            : /* No clobbers */);
12087   return result;
12088 }
12089 
12090 #define vmuld_lane_f64(a, b, c)                                         \
12091   __extension__                                                         \
12092     ({                                                                  \
12093        float64x2_t b_ = (b);                                            \
12094        float64_t a_ = (a);                                              \
12095        float64_t result;                                                \
12096        __asm__ ("fmul %d0,%d1,%2.d[%3]"                                 \
12097                 : "=w"(result)                                          \
12098                 : "w"(a_), "w"(b_), "i"(c)                              \
12099                 : /* No clobbers */);                                   \
12100        result;                                                          \
12101      })
12102 
12103 #define vmull_high_lane_s16(a, b, c)                                    \
12104   __extension__                                                         \
12105     ({                                                                  \
12106        int16x8_t b_ = (b);                                              \
12107        int16x8_t a_ = (a);                                              \
12108        int32x4_t result;                                                \
12109        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
12110                 : "=w"(result)                                          \
12111                 : "w"(a_), "w"(b_), "i"(c)                              \
12112                 : /* No clobbers */);                                   \
12113        result;                                                          \
12114      })
12115 
12116 #define vmull_high_lane_s32(a, b, c)                                    \
12117   __extension__                                                         \
12118     ({                                                                  \
12119        int32x4_t b_ = (b);                                              \
12120        int32x4_t a_ = (a);                                              \
12121        int64x2_t result;                                                \
12122        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
12123                 : "=w"(result)                                          \
12124                 : "w"(a_), "w"(b_), "i"(c)                              \
12125                 : /* No clobbers */);                                   \
12126        result;                                                          \
12127      })
12128 
12129 #define vmull_high_lane_u16(a, b, c)                                    \
12130   __extension__                                                         \
12131     ({                                                                  \
12132        uint16x8_t b_ = (b);                                             \
12133        uint16x8_t a_ = (a);                                             \
12134        uint32x4_t result;                                               \
12135        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
12136                 : "=w"(result)                                          \
12137                 : "w"(a_), "w"(b_), "i"(c)                              \
12138                 : /* No clobbers */);                                   \
12139        result;                                                          \
12140      })
12141 
12142 #define vmull_high_lane_u32(a, b, c)                                    \
12143   __extension__                                                         \
12144     ({                                                                  \
12145        uint32x4_t b_ = (b);                                             \
12146        uint32x4_t a_ = (a);                                             \
12147        uint64x2_t result;                                               \
12148        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
12149                 : "=w"(result)                                          \
12150                 : "w"(a_), "w"(b_), "i"(c)                              \
12151                 : /* No clobbers */);                                   \
12152        result;                                                          \
12153      })
12154 
12155 #define vmull_high_laneq_s16(a, b, c)                                   \
12156   __extension__                                                         \
12157     ({                                                                  \
12158        int16x8_t b_ = (b);                                              \
12159        int16x8_t a_ = (a);                                              \
12160        int32x4_t result;                                                \
12161        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
12162                 : "=w"(result)                                          \
12163                 : "w"(a_), "w"(b_), "i"(c)                              \
12164                 : /* No clobbers */);                                   \
12165        result;                                                          \
12166      })
12167 
12168 #define vmull_high_laneq_s32(a, b, c)                                   \
12169   __extension__                                                         \
12170     ({                                                                  \
12171        int32x4_t b_ = (b);                                              \
12172        int32x4_t a_ = (a);                                              \
12173        int64x2_t result;                                                \
12174        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
12175                 : "=w"(result)                                          \
12176                 : "w"(a_), "w"(b_), "i"(c)                              \
12177                 : /* No clobbers */);                                   \
12178        result;                                                          \
12179      })
12180 
12181 #define vmull_high_laneq_u16(a, b, c)                                   \
12182   __extension__                                                         \
12183     ({                                                                  \
12184        uint16x8_t b_ = (b);                                             \
12185        uint16x8_t a_ = (a);                                             \
12186        uint32x4_t result;                                               \
12187        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
12188                 : "=w"(result)                                          \
12189                 : "w"(a_), "w"(b_), "i"(c)                              \
12190                 : /* No clobbers */);                                   \
12191        result;                                                          \
12192      })
12193 
12194 #define vmull_high_laneq_u32(a, b, c)                                   \
12195   __extension__                                                         \
12196     ({                                                                  \
12197        uint32x4_t b_ = (b);                                             \
12198        uint32x4_t a_ = (a);                                             \
12199        uint64x2_t result;                                               \
12200        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
12201                 : "=w"(result)                                          \
12202                 : "w"(a_), "w"(b_), "i"(c)                              \
12203                 : /* No clobbers */);                                   \
12204        result;                                                          \
12205      })
12206 
12207 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_high_n_s16(int16x8_t a,int16_t b)12208 vmull_high_n_s16 (int16x8_t a, int16_t b)
12209 {
12210   int32x4_t result;
12211   __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
12212            : "=w"(result)
12213            : "w"(a), "w"(b)
12214            : /* No clobbers */);
12215   return result;
12216 }
12217 
12218 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_high_n_s32(int32x4_t a,int32_t b)12219 vmull_high_n_s32 (int32x4_t a, int32_t b)
12220 {
12221   int64x2_t result;
12222   __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
12223            : "=w"(result)
12224            : "w"(a), "w"(b)
12225            : /* No clobbers */);
12226   return result;
12227 }
12228 
12229 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_high_n_u16(uint16x8_t a,uint16_t b)12230 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
12231 {
12232   uint32x4_t result;
12233   __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
12234            : "=w"(result)
12235            : "w"(a), "w"(b)
12236            : /* No clobbers */);
12237   return result;
12238 }
12239 
12240 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_high_n_u32(uint32x4_t a,uint32_t b)12241 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
12242 {
12243   uint64x2_t result;
12244   __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
12245            : "=w"(result)
12246            : "w"(a), "w"(b)
12247            : /* No clobbers */);
12248   return result;
12249 }
12250 
12251 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmull_high_p8(poly8x16_t a,poly8x16_t b)12252 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
12253 {
12254   poly16x8_t result;
12255   __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
12256            : "=w"(result)
12257            : "w"(a), "w"(b)
12258            : /* No clobbers */);
12259   return result;
12260 }
12261 
12262 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmull_high_s8(int8x16_t a,int8x16_t b)12263 vmull_high_s8 (int8x16_t a, int8x16_t b)
12264 {
12265   int16x8_t result;
12266   __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
12267            : "=w"(result)
12268            : "w"(a), "w"(b)
12269            : /* No clobbers */);
12270   return result;
12271 }
12272 
12273 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_high_s16(int16x8_t a,int16x8_t b)12274 vmull_high_s16 (int16x8_t a, int16x8_t b)
12275 {
12276   int32x4_t result;
12277   __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
12278            : "=w"(result)
12279            : "w"(a), "w"(b)
12280            : /* No clobbers */);
12281   return result;
12282 }
12283 
12284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_high_s32(int32x4_t a,int32x4_t b)12285 vmull_high_s32 (int32x4_t a, int32x4_t b)
12286 {
12287   int64x2_t result;
12288   __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
12289            : "=w"(result)
12290            : "w"(a), "w"(b)
12291            : /* No clobbers */);
12292   return result;
12293 }
12294 
12295 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmull_high_u8(uint8x16_t a,uint8x16_t b)12296 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
12297 {
12298   uint16x8_t result;
12299   __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
12300            : "=w"(result)
12301            : "w"(a), "w"(b)
12302            : /* No clobbers */);
12303   return result;
12304 }
12305 
12306 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_high_u16(uint16x8_t a,uint16x8_t b)12307 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
12308 {
12309   uint32x4_t result;
12310   __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
12311            : "=w"(result)
12312            : "w"(a), "w"(b)
12313            : /* No clobbers */);
12314   return result;
12315 }
12316 
12317 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_high_u32(uint32x4_t a,uint32x4_t b)12318 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
12319 {
12320   uint64x2_t result;
12321   __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
12322            : "=w"(result)
12323            : "w"(a), "w"(b)
12324            : /* No clobbers */);
12325   return result;
12326 }
12327 
12328 #define vmull_lane_s16(a, b, c)                                         \
12329   __extension__                                                         \
12330     ({                                                                  \
12331        int16x4_t b_ = (b);                                              \
12332        int16x4_t a_ = (a);                                              \
12333        int32x4_t result;                                                \
12334        __asm__ ("smull %0.4s,%1.4h,%2.h[%3]"                            \
12335                 : "=w"(result)                                          \
12336                 : "w"(a_), "w"(b_), "i"(c)                              \
12337                 : /* No clobbers */);                                   \
12338        result;                                                          \
12339      })
12340 
12341 #define vmull_lane_s32(a, b, c)                                         \
12342   __extension__                                                         \
12343     ({                                                                  \
12344        int32x2_t b_ = (b);                                              \
12345        int32x2_t a_ = (a);                                              \
12346        int64x2_t result;                                                \
12347        __asm__ ("smull %0.2d,%1.2s,%2.s[%3]"                            \
12348                 : "=w"(result)                                          \
12349                 : "w"(a_), "w"(b_), "i"(c)                              \
12350                 : /* No clobbers */);                                   \
12351        result;                                                          \
12352      })
12353 
12354 #define vmull_lane_u16(a, b, c)                                         \
12355   __extension__                                                         \
12356     ({                                                                  \
12357        uint16x4_t b_ = (b);                                             \
12358        uint16x4_t a_ = (a);                                             \
12359        uint32x4_t result;                                               \
12360        __asm__ ("umull %0.4s,%1.4h,%2.h[%3]"                            \
12361                 : "=w"(result)                                          \
12362                 : "w"(a_), "w"(b_), "i"(c)                              \
12363                 : /* No clobbers */);                                   \
12364        result;                                                          \
12365      })
12366 
12367 #define vmull_lane_u32(a, b, c)                                         \
12368   __extension__                                                         \
12369     ({                                                                  \
12370        uint32x2_t b_ = (b);                                             \
12371        uint32x2_t a_ = (a);                                             \
12372        uint64x2_t result;                                               \
12373        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
12374                 : "=w"(result)                                          \
12375                 : "w"(a_), "w"(b_), "i"(c)                              \
12376                 : /* No clobbers */);                                   \
12377        result;                                                          \
12378      })
12379 
12380 #define vmull_laneq_s16(a, b, c)                                        \
12381   __extension__                                                         \
12382     ({                                                                  \
12383        int16x8_t b_ = (b);                                              \
12384        int16x4_t a_ = (a);                                              \
12385        int32x4_t result;                                                \
12386        __asm__ ("smull %0.4s, %1.4h, %2.h[%3]"                          \
12387                 : "=w"(result)                                          \
12388                 : "w"(a_), "w"(b_), "i"(c)                              \
12389                 : /* No clobbers */);                                   \
12390        result;                                                          \
12391      })
12392 
12393 #define vmull_laneq_s32(a, b, c)                                        \
12394   __extension__                                                         \
12395     ({                                                                  \
12396        int32x4_t b_ = (b);                                              \
12397        int32x2_t a_ = (a);                                              \
12398        int64x2_t result;                                                \
12399        __asm__ ("smull %0.2d, %1.2s, %2.s[%3]"                          \
12400                 : "=w"(result)                                          \
12401                 : "w"(a_), "w"(b_), "i"(c)                              \
12402                 : /* No clobbers */);                                   \
12403        result;                                                          \
12404      })
12405 
12406 #define vmull_laneq_u16(a, b, c)                                        \
12407   __extension__                                                         \
12408     ({                                                                  \
12409        uint16x8_t b_ = (b);                                             \
12410        uint16x4_t a_ = (a);                                             \
12411        uint32x4_t result;                                               \
12412        __asm__ ("umull %0.4s, %1.4h, %2.h[%3]"                          \
12413                 : "=w"(result)                                          \
12414                 : "w"(a_), "w"(b_), "i"(c)                              \
12415                 : /* No clobbers */);                                   \
12416        result;                                                          \
12417      })
12418 
12419 #define vmull_laneq_u32(a, b, c)                                        \
12420   __extension__                                                         \
12421     ({                                                                  \
12422        uint32x4_t b_ = (b);                                             \
12423        uint32x2_t a_ = (a);                                             \
12424        uint64x2_t result;                                               \
12425        __asm__ ("umull %0.2d, %1.2s, %2.s[%3]"                          \
12426                 : "=w"(result)                                          \
12427                 : "w"(a_), "w"(b_), "i"(c)                              \
12428                 : /* No clobbers */);                                   \
12429        result;                                                          \
12430      })
12431 
12432 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_n_s16(int16x4_t a,int16_t b)12433 vmull_n_s16 (int16x4_t a, int16_t b)
12434 {
12435   int32x4_t result;
12436   __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
12437            : "=w"(result)
12438            : "w"(a), "w"(b)
12439            : /* No clobbers */);
12440   return result;
12441 }
12442 
12443 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_n_s32(int32x2_t a,int32_t b)12444 vmull_n_s32 (int32x2_t a, int32_t b)
12445 {
12446   int64x2_t result;
12447   __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
12448            : "=w"(result)
12449            : "w"(a), "w"(b)
12450            : /* No clobbers */);
12451   return result;
12452 }
12453 
12454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_n_u16(uint16x4_t a,uint16_t b)12455 vmull_n_u16 (uint16x4_t a, uint16_t b)
12456 {
12457   uint32x4_t result;
12458   __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
12459            : "=w"(result)
12460            : "w"(a), "w"(b)
12461            : /* No clobbers */);
12462   return result;
12463 }
12464 
12465 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_n_u32(uint32x2_t a,uint32_t b)12466 vmull_n_u32 (uint32x2_t a, uint32_t b)
12467 {
12468   uint64x2_t result;
12469   __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
12470            : "=w"(result)
12471            : "w"(a), "w"(b)
12472            : /* No clobbers */);
12473   return result;
12474 }
12475 
12476 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vmull_p8(poly8x8_t a,poly8x8_t b)12477 vmull_p8 (poly8x8_t a, poly8x8_t b)
12478 {
12479   poly16x8_t result;
12480   __asm__ ("pmull %0.8h, %1.8b, %2.8b"
12481            : "=w"(result)
12482            : "w"(a), "w"(b)
12483            : /* No clobbers */);
12484   return result;
12485 }
12486 
12487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmull_s8(int8x8_t a,int8x8_t b)12488 vmull_s8 (int8x8_t a, int8x8_t b)
12489 {
12490   int16x8_t result;
12491   __asm__ ("smull %0.8h, %1.8b, %2.8b"
12492            : "=w"(result)
12493            : "w"(a), "w"(b)
12494            : /* No clobbers */);
12495   return result;
12496 }
12497 
12498 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmull_s16(int16x4_t a,int16x4_t b)12499 vmull_s16 (int16x4_t a, int16x4_t b)
12500 {
12501   int32x4_t result;
12502   __asm__ ("smull %0.4s, %1.4h, %2.4h"
12503            : "=w"(result)
12504            : "w"(a), "w"(b)
12505            : /* No clobbers */);
12506   return result;
12507 }
12508 
12509 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vmull_s32(int32x2_t a,int32x2_t b)12510 vmull_s32 (int32x2_t a, int32x2_t b)
12511 {
12512   int64x2_t result;
12513   __asm__ ("smull %0.2d, %1.2s, %2.2s"
12514            : "=w"(result)
12515            : "w"(a), "w"(b)
12516            : /* No clobbers */);
12517   return result;
12518 }
12519 
12520 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmull_u8(uint8x8_t a,uint8x8_t b)12521 vmull_u8 (uint8x8_t a, uint8x8_t b)
12522 {
12523   uint16x8_t result;
12524   __asm__ ("umull %0.8h, %1.8b, %2.8b"
12525            : "=w"(result)
12526            : "w"(a), "w"(b)
12527            : /* No clobbers */);
12528   return result;
12529 }
12530 
12531 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmull_u16(uint16x4_t a,uint16x4_t b)12532 vmull_u16 (uint16x4_t a, uint16x4_t b)
12533 {
12534   uint32x4_t result;
12535   __asm__ ("umull %0.4s, %1.4h, %2.4h"
12536            : "=w"(result)
12537            : "w"(a), "w"(b)
12538            : /* No clobbers */);
12539   return result;
12540 }
12541 
12542 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vmull_u32(uint32x2_t a,uint32x2_t b)12543 vmull_u32 (uint32x2_t a, uint32x2_t b)
12544 {
12545   uint64x2_t result;
12546   __asm__ ("umull %0.2d, %1.2s, %2.2s"
12547            : "=w"(result)
12548            : "w"(a), "w"(b)
12549            : /* No clobbers */);
12550   return result;
12551 }
12552 
12553 #define vmulq_lane_f32(a, b, c)                                         \
12554   __extension__                                                         \
12555     ({                                                                  \
12556        float32x2_t b_ = (b);                                            \
12557        float32x4_t a_ = (a);                                            \
12558        float32x4_t result;                                              \
12559        __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]"                           \
12560                 : "=w"(result)                                          \
12561                 : "w"(a_), "w"(b_), "i"(c)                              \
12562                 : /* No clobbers */);                                   \
12563        result;                                                          \
12564      })
12565 
12566 #define vmulq_lane_f64(a, b, c)                                         \
12567   __extension__                                                         \
12568     ({                                                                  \
12569        float64x1_t b_ = (b);                                            \
12570        float64x2_t a_ = (a);                                            \
12571        float64x2_t result;                                              \
12572        __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]"                             \
12573                 : "=w"(result)                                          \
12574                 : "w"(a_), "w"(b_), "i"(c)                              \
12575                 : /* No clobbers */);                                   \
12576        result;                                                          \
12577      })
12578 
12579 #define vmulq_lane_s16(a, b, c)                                         \
12580   __extension__                                                         \
12581     ({                                                                  \
12582        int16x4_t b_ = (b);                                              \
12583        int16x8_t a_ = (a);                                              \
12584        int16x8_t result;                                                \
12585        __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
12586                 : "=w"(result)                                          \
12587                 : "w"(a_), "w"(b_), "i"(c)                              \
12588                 : /* No clobbers */);                                   \
12589        result;                                                          \
12590      })
12591 
12592 #define vmulq_lane_s32(a, b, c)                                         \
12593   __extension__                                                         \
12594     ({                                                                  \
12595        int32x2_t b_ = (b);                                              \
12596        int32x4_t a_ = (a);                                              \
12597        int32x4_t result;                                                \
12598        __asm__ ("mul %0.4s,%1.4s,%2.s[%3]"                              \
12599                 : "=w"(result)                                          \
12600                 : "w"(a_), "w"(b_), "i"(c)                              \
12601                 : /* No clobbers */);                                   \
12602        result;                                                          \
12603      })
12604 
12605 #define vmulq_lane_u16(a, b, c)                                         \
12606   __extension__                                                         \
12607     ({                                                                  \
12608        uint16x4_t b_ = (b);                                             \
12609        uint16x8_t a_ = (a);                                             \
12610        uint16x8_t result;                                               \
12611        __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
12612                 : "=w"(result)                                          \
12613                 : "w"(a_), "w"(b_), "i"(c)                              \
12614                 : /* No clobbers */);                                   \
12615        result;                                                          \
12616      })
12617 
12618 #define vmulq_lane_u32(a, b, c)                                         \
12619   __extension__                                                         \
12620     ({                                                                  \
12621        uint32x2_t b_ = (b);                                             \
12622        uint32x4_t a_ = (a);                                             \
12623        uint32x4_t result;                                               \
12624        __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
12625                 : "=w"(result)                                          \
12626                 : "w"(a_), "w"(b_), "i"(c)                              \
12627                 : /* No clobbers */);                                   \
12628        result;                                                          \
12629      })
12630 
12631 #define vmulq_laneq_f32(a, b, c)                                        \
12632   __extension__                                                         \
12633     ({                                                                  \
12634        float32x4_t b_ = (b);                                            \
12635        float32x4_t a_ = (a);                                            \
12636        float32x4_t result;                                              \
12637        __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]"                           \
12638                 : "=w"(result)                                          \
12639                 : "w"(a_), "w"(b_), "i"(c)                              \
12640                 : /* No clobbers */);                                   \
12641        result;                                                          \
12642      })
12643 
12644 #define vmulq_laneq_f64(a, b, c)                                        \
12645   __extension__                                                         \
12646     ({                                                                  \
12647        float64x2_t b_ = (b);                                            \
12648        float64x2_t a_ = (a);                                            \
12649        float64x2_t result;                                              \
12650        __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]"                             \
12651                 : "=w"(result)                                          \
12652                 : "w"(a_), "w"(b_), "i"(c)                              \
12653                 : /* No clobbers */);                                   \
12654        result;                                                          \
12655      })
12656 
12657 #define vmulq_laneq_s16(a, b, c)                                        \
12658   __extension__                                                         \
12659     ({                                                                  \
12660        int16x8_t b_ = (b);                                              \
12661        int16x8_t a_ = (a);                                              \
12662        int16x8_t result;                                                \
12663        __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
12664                 : "=w"(result)                                          \
12665                 : "w"(a_), "w"(b_), "i"(c)                              \
12666                 : /* No clobbers */);                                   \
12667        result;                                                          \
12668      })
12669 
12670 #define vmulq_laneq_s32(a, b, c)                                        \
12671   __extension__                                                         \
12672     ({                                                                  \
12673        int32x4_t b_ = (b);                                              \
12674        int32x4_t a_ = (a);                                              \
12675        int32x4_t result;                                                \
12676        __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
12677                 : "=w"(result)                                          \
12678                 : "w"(a_), "w"(b_), "i"(c)                              \
12679                 : /* No clobbers */);                                   \
12680        result;                                                          \
12681      })
12682 
12683 #define vmulq_laneq_u16(a, b, c)                                        \
12684   __extension__                                                         \
12685     ({                                                                  \
12686        uint16x8_t b_ = (b);                                             \
12687        uint16x8_t a_ = (a);                                             \
12688        uint16x8_t result;                                               \
12689        __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
12690                 : "=w"(result)                                          \
12691                 : "w"(a_), "w"(b_), "i"(c)                              \
12692                 : /* No clobbers */);                                   \
12693        result;                                                          \
12694      })
12695 
12696 #define vmulq_laneq_u32(a, b, c)                                        \
12697   __extension__                                                         \
12698     ({                                                                  \
12699        uint32x4_t b_ = (b);                                             \
12700        uint32x4_t a_ = (a);                                             \
12701        uint32x4_t result;                                               \
12702        __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
12703                 : "=w"(result)                                          \
12704                 : "w"(a_), "w"(b_), "i"(c)                              \
12705                 : /* No clobbers */);                                   \
12706        result;                                                          \
12707      })
12708 
12709 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_n_f32(float32x4_t a,float32_t b)12710 vmulq_n_f32 (float32x4_t a, float32_t b)
12711 {
12712   float32x4_t result;
12713   __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
12714            : "=w"(result)
12715            : "w"(a), "w"(b)
12716            : /* No clobbers */);
12717   return result;
12718 }
12719 
12720 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_n_f64(float64x2_t a,float64_t b)12721 vmulq_n_f64 (float64x2_t a, float64_t b)
12722 {
12723   float64x2_t result;
12724   __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
12725            : "=w"(result)
12726            : "w"(a), "w"(b)
12727            : /* No clobbers */);
12728   return result;
12729 }
12730 
12731 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_n_s16(int16x8_t a,int16_t b)12732 vmulq_n_s16 (int16x8_t a, int16_t b)
12733 {
12734   int16x8_t result;
12735   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12736            : "=w"(result)
12737            : "w"(a), "w"(b)
12738            : /* No clobbers */);
12739   return result;
12740 }
12741 
12742 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_n_s32(int32x4_t a,int32_t b)12743 vmulq_n_s32 (int32x4_t a, int32_t b)
12744 {
12745   int32x4_t result;
12746   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12747            : "=w"(result)
12748            : "w"(a), "w"(b)
12749            : /* No clobbers */);
12750   return result;
12751 }
12752 
12753 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_n_u16(uint16x8_t a,uint16_t b)12754 vmulq_n_u16 (uint16x8_t a, uint16_t b)
12755 {
12756   uint16x8_t result;
12757   __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12758            : "=w"(result)
12759            : "w"(a), "w"(b)
12760            : /* No clobbers */);
12761   return result;
12762 }
12763 
12764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_n_u32(uint32x4_t a,uint32_t b)12765 vmulq_n_u32 (uint32x4_t a, uint32_t b)
12766 {
12767   uint32x4_t result;
12768   __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12769            : "=w"(result)
12770            : "w"(a), "w"(b)
12771            : /* No clobbers */);
12772   return result;
12773 }
12774 
12775 #define vmuls_lane_f32(a, b, c)                                         \
12776   __extension__                                                         \
12777     ({                                                                  \
12778        float32x4_t b_ = (b);                                            \
12779        float32_t a_ = (a);                                              \
12780        float32_t result;                                                \
12781        __asm__ ("fmul %s0,%s1,%2.s[%3]"                                 \
12782                 : "=w"(result)                                          \
12783                 : "w"(a_), "w"(b_), "i"(c)                              \
12784                 : /* No clobbers */);                                   \
12785        result;                                                          \
12786      })
12787 
12788 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_f32(float32x2_t a,float32x2_t b)12789 vmulx_f32 (float32x2_t a, float32x2_t b)
12790 {
12791   float32x2_t result;
12792   __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
12793            : "=w"(result)
12794            : "w"(a), "w"(b)
12795            : /* No clobbers */);
12796   return result;
12797 }
12798 
12799 #define vmulx_lane_f32(a, b, c)                                         \
12800   __extension__                                                         \
12801     ({                                                                  \
12802        float32x4_t b_ = (b);                                            \
12803        float32x2_t a_ = (a);                                            \
12804        float32x2_t result;                                              \
12805        __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]"                            \
12806                 : "=w"(result)                                          \
12807                 : "w"(a_), "w"(b_), "i"(c)                              \
12808                 : /* No clobbers */);                                   \
12809        result;                                                          \
12810      })
12811 
12812 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmulxd_f64(float64_t a,float64_t b)12813 vmulxd_f64 (float64_t a, float64_t b)
12814 {
12815   float64_t result;
12816   __asm__ ("fmulx %d0, %d1, %d2"
12817            : "=w"(result)
12818            : "w"(a), "w"(b)
12819            : /* No clobbers */);
12820   return result;
12821 }
12822 
12823 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulxq_f32(float32x4_t a,float32x4_t b)12824 vmulxq_f32 (float32x4_t a, float32x4_t b)
12825 {
12826   float32x4_t result;
12827   __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
12828            : "=w"(result)
12829            : "w"(a), "w"(b)
12830            : /* No clobbers */);
12831   return result;
12832 }
12833 
12834 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulxq_f64(float64x2_t a,float64x2_t b)12835 vmulxq_f64 (float64x2_t a, float64x2_t b)
12836 {
12837   float64x2_t result;
12838   __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
12839            : "=w"(result)
12840            : "w"(a), "w"(b)
12841            : /* No clobbers */);
12842   return result;
12843 }
12844 
12845 #define vmulxq_lane_f32(a, b, c)                                        \
12846   __extension__                                                         \
12847     ({                                                                  \
12848        float32x4_t b_ = (b);                                            \
12849        float32x4_t a_ = (a);                                            \
12850        float32x4_t result;                                              \
12851        __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]"                            \
12852                 : "=w"(result)                                          \
12853                 : "w"(a_), "w"(b_), "i"(c)                              \
12854                 : /* No clobbers */);                                   \
12855        result;                                                          \
12856      })
12857 
12858 #define vmulxq_lane_f64(a, b, c)                                        \
12859   __extension__                                                         \
12860     ({                                                                  \
12861        float64x2_t b_ = (b);                                            \
12862        float64x2_t a_ = (a);                                            \
12863        float64x2_t result;                                              \
12864        __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]"                            \
12865                 : "=w"(result)                                          \
12866                 : "w"(a_), "w"(b_), "i"(c)                              \
12867                 : /* No clobbers */);                                   \
12868        result;                                                          \
12869      })
12870 
12871 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmulxs_f32(float32_t a,float32_t b)12872 vmulxs_f32 (float32_t a, float32_t b)
12873 {
12874   float32_t result;
12875   __asm__ ("fmulx %s0, %s1, %s2"
12876            : "=w"(result)
12877            : "w"(a), "w"(b)
12878            : /* No clobbers */);
12879   return result;
12880 }
12881 
12882 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8(poly8x8_t a)12883 vmvn_p8 (poly8x8_t a)
12884 {
12885   poly8x8_t result;
12886   __asm__ ("mvn %0.8b,%1.8b"
12887            : "=w"(result)
12888            : "w"(a)
12889            : /* No clobbers */);
12890   return result;
12891 }
12892 
12893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmvn_s8(int8x8_t a)12894 vmvn_s8 (int8x8_t a)
12895 {
12896   int8x8_t result;
12897   __asm__ ("mvn %0.8b,%1.8b"
12898            : "=w"(result)
12899            : "w"(a)
12900            : /* No clobbers */);
12901   return result;
12902 }
12903 
12904 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmvn_s16(int16x4_t a)12905 vmvn_s16 (int16x4_t a)
12906 {
12907   int16x4_t result;
12908   __asm__ ("mvn %0.8b,%1.8b"
12909            : "=w"(result)
12910            : "w"(a)
12911            : /* No clobbers */);
12912   return result;
12913 }
12914 
12915 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmvn_s32(int32x2_t a)12916 vmvn_s32 (int32x2_t a)
12917 {
12918   int32x2_t result;
12919   __asm__ ("mvn %0.8b,%1.8b"
12920            : "=w"(result)
12921            : "w"(a)
12922            : /* No clobbers */);
12923   return result;
12924 }
12925 
12926 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmvn_u8(uint8x8_t a)12927 vmvn_u8 (uint8x8_t a)
12928 {
12929   uint8x8_t result;
12930   __asm__ ("mvn %0.8b,%1.8b"
12931            : "=w"(result)
12932            : "w"(a)
12933            : /* No clobbers */);
12934   return result;
12935 }
12936 
12937 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmvn_u16(uint16x4_t a)12938 vmvn_u16 (uint16x4_t a)
12939 {
12940   uint16x4_t result;
12941   __asm__ ("mvn %0.8b,%1.8b"
12942            : "=w"(result)
12943            : "w"(a)
12944            : /* No clobbers */);
12945   return result;
12946 }
12947 
12948 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmvn_u32(uint32x2_t a)12949 vmvn_u32 (uint32x2_t a)
12950 {
12951   uint32x2_t result;
12952   __asm__ ("mvn %0.8b,%1.8b"
12953            : "=w"(result)
12954            : "w"(a)
12955            : /* No clobbers */);
12956   return result;
12957 }
12958 
12959 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vmvnq_p8(poly8x16_t a)12960 vmvnq_p8 (poly8x16_t a)
12961 {
12962   poly8x16_t result;
12963   __asm__ ("mvn %0.16b,%1.16b"
12964            : "=w"(result)
12965            : "w"(a)
12966            : /* No clobbers */);
12967   return result;
12968 }
12969 
12970 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmvnq_s8(int8x16_t a)12971 vmvnq_s8 (int8x16_t a)
12972 {
12973   int8x16_t result;
12974   __asm__ ("mvn %0.16b,%1.16b"
12975            : "=w"(result)
12976            : "w"(a)
12977            : /* No clobbers */);
12978   return result;
12979 }
12980 
12981 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmvnq_s16(int16x8_t a)12982 vmvnq_s16 (int16x8_t a)
12983 {
12984   int16x8_t result;
12985   __asm__ ("mvn %0.16b,%1.16b"
12986            : "=w"(result)
12987            : "w"(a)
12988            : /* No clobbers */);
12989   return result;
12990 }
12991 
12992 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmvnq_s32(int32x4_t a)12993 vmvnq_s32 (int32x4_t a)
12994 {
12995   int32x4_t result;
12996   __asm__ ("mvn %0.16b,%1.16b"
12997            : "=w"(result)
12998            : "w"(a)
12999            : /* No clobbers */);
13000   return result;
13001 }
13002 
13003 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmvnq_u8(uint8x16_t a)13004 vmvnq_u8 (uint8x16_t a)
13005 {
13006   uint8x16_t result;
13007   __asm__ ("mvn %0.16b,%1.16b"
13008            : "=w"(result)
13009            : "w"(a)
13010            : /* No clobbers */);
13011   return result;
13012 }
13013 
13014 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmvnq_u16(uint16x8_t a)13015 vmvnq_u16 (uint16x8_t a)
13016 {
13017   uint16x8_t result;
13018   __asm__ ("mvn %0.16b,%1.16b"
13019            : "=w"(result)
13020            : "w"(a)
13021            : /* No clobbers */);
13022   return result;
13023 }
13024 
13025 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmvnq_u32(uint32x4_t a)13026 vmvnq_u32 (uint32x4_t a)
13027 {
13028   uint32x4_t result;
13029   __asm__ ("mvn %0.16b,%1.16b"
13030            : "=w"(result)
13031            : "w"(a)
13032            : /* No clobbers */);
13033   return result;
13034 }
13035 
13036 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vneg_f32(float32x2_t a)13037 vneg_f32 (float32x2_t a)
13038 {
13039   float32x2_t result;
13040   __asm__ ("fneg %0.2s,%1.2s"
13041            : "=w"(result)
13042            : "w"(a)
13043            : /* No clobbers */);
13044   return result;
13045 }
13046 
13047 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vneg_s8(int8x8_t a)13048 vneg_s8 (int8x8_t a)
13049 {
13050   int8x8_t result;
13051   __asm__ ("neg %0.8b,%1.8b"
13052            : "=w"(result)
13053            : "w"(a)
13054            : /* No clobbers */);
13055   return result;
13056 }
13057 
13058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vneg_s16(int16x4_t a)13059 vneg_s16 (int16x4_t a)
13060 {
13061   int16x4_t result;
13062   __asm__ ("neg %0.4h,%1.4h"
13063            : "=w"(result)
13064            : "w"(a)
13065            : /* No clobbers */);
13066   return result;
13067 }
13068 
13069 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vneg_s32(int32x2_t a)13070 vneg_s32 (int32x2_t a)
13071 {
13072   int32x2_t result;
13073   __asm__ ("neg %0.2s,%1.2s"
13074            : "=w"(result)
13075            : "w"(a)
13076            : /* No clobbers */);
13077   return result;
13078 }
13079 
13080 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vnegq_f32(float32x4_t a)13081 vnegq_f32 (float32x4_t a)
13082 {
13083   float32x4_t result;
13084   __asm__ ("fneg %0.4s,%1.4s"
13085            : "=w"(result)
13086            : "w"(a)
13087            : /* No clobbers */);
13088   return result;
13089 }
13090 
13091 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vnegq_f64(float64x2_t a)13092 vnegq_f64 (float64x2_t a)
13093 {
13094   float64x2_t result;
13095   __asm__ ("fneg %0.2d,%1.2d"
13096            : "=w"(result)
13097            : "w"(a)
13098            : /* No clobbers */);
13099   return result;
13100 }
13101 
13102 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vnegq_s8(int8x16_t a)13103 vnegq_s8 (int8x16_t a)
13104 {
13105   int8x16_t result;
13106   __asm__ ("neg %0.16b,%1.16b"
13107            : "=w"(result)
13108            : "w"(a)
13109            : /* No clobbers */);
13110   return result;
13111 }
13112 
13113 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vnegq_s16(int16x8_t a)13114 vnegq_s16 (int16x8_t a)
13115 {
13116   int16x8_t result;
13117   __asm__ ("neg %0.8h,%1.8h"
13118            : "=w"(result)
13119            : "w"(a)
13120            : /* No clobbers */);
13121   return result;
13122 }
13123 
13124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vnegq_s32(int32x4_t a)13125 vnegq_s32 (int32x4_t a)
13126 {
13127   int32x4_t result;
13128   __asm__ ("neg %0.4s,%1.4s"
13129            : "=w"(result)
13130            : "w"(a)
13131            : /* No clobbers */);
13132   return result;
13133 }
13134 
13135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vnegq_s64(int64x2_t a)13136 vnegq_s64 (int64x2_t a)
13137 {
13138   int64x2_t result;
13139   __asm__ ("neg %0.2d,%1.2d"
13140            : "=w"(result)
13141            : "w"(a)
13142            : /* No clobbers */);
13143   return result;
13144 }
13145 
13146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpadal_s8(int16x4_t a,int8x8_t b)13147 vpadal_s8 (int16x4_t a, int8x8_t b)
13148 {
13149   int16x4_t result;
13150   __asm__ ("sadalp %0.4h,%2.8b"
13151            : "=w"(result)
13152            : "0"(a), "w"(b)
13153            : /* No clobbers */);
13154   return result;
13155 }
13156 
13157 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpadal_s16(int32x2_t a,int16x4_t b)13158 vpadal_s16 (int32x2_t a, int16x4_t b)
13159 {
13160   int32x2_t result;
13161   __asm__ ("sadalp %0.2s,%2.4h"
13162            : "=w"(result)
13163            : "0"(a), "w"(b)
13164            : /* No clobbers */);
13165   return result;
13166 }
13167 
13168 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vpadal_s32(int64x1_t a,int32x2_t b)13169 vpadal_s32 (int64x1_t a, int32x2_t b)
13170 {
13171   int64x1_t result;
13172   __asm__ ("sadalp %0.1d,%2.2s"
13173            : "=w"(result)
13174            : "0"(a), "w"(b)
13175            : /* No clobbers */);
13176   return result;
13177 }
13178 
13179 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpadal_u8(uint16x4_t a,uint8x8_t b)13180 vpadal_u8 (uint16x4_t a, uint8x8_t b)
13181 {
13182   uint16x4_t result;
13183   __asm__ ("uadalp %0.4h,%2.8b"
13184            : "=w"(result)
13185            : "0"(a), "w"(b)
13186            : /* No clobbers */);
13187   return result;
13188 }
13189 
13190 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpadal_u16(uint32x2_t a,uint16x4_t b)13191 vpadal_u16 (uint32x2_t a, uint16x4_t b)
13192 {
13193   uint32x2_t result;
13194   __asm__ ("uadalp %0.2s,%2.4h"
13195            : "=w"(result)
13196            : "0"(a), "w"(b)
13197            : /* No clobbers */);
13198   return result;
13199 }
13200 
13201 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vpadal_u32(uint64x1_t a,uint32x2_t b)13202 vpadal_u32 (uint64x1_t a, uint32x2_t b)
13203 {
13204   uint64x1_t result;
13205   __asm__ ("uadalp %0.1d,%2.2s"
13206            : "=w"(result)
13207            : "0"(a), "w"(b)
13208            : /* No clobbers */);
13209   return result;
13210 }
13211 
13212 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpadalq_s8(int16x8_t a,int8x16_t b)13213 vpadalq_s8 (int16x8_t a, int8x16_t b)
13214 {
13215   int16x8_t result;
13216   __asm__ ("sadalp %0.8h,%2.16b"
13217            : "=w"(result)
13218            : "0"(a), "w"(b)
13219            : /* No clobbers */);
13220   return result;
13221 }
13222 
13223 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpadalq_s16(int32x4_t a,int16x8_t b)13224 vpadalq_s16 (int32x4_t a, int16x8_t b)
13225 {
13226   int32x4_t result;
13227   __asm__ ("sadalp %0.4s,%2.8h"
13228            : "=w"(result)
13229            : "0"(a), "w"(b)
13230            : /* No clobbers */);
13231   return result;
13232 }
13233 
13234 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpadalq_s32(int64x2_t a,int32x4_t b)13235 vpadalq_s32 (int64x2_t a, int32x4_t b)
13236 {
13237   int64x2_t result;
13238   __asm__ ("sadalp %0.2d,%2.4s"
13239            : "=w"(result)
13240            : "0"(a), "w"(b)
13241            : /* No clobbers */);
13242   return result;
13243 }
13244 
13245 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpadalq_u8(uint16x8_t a,uint8x16_t b)13246 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
13247 {
13248   uint16x8_t result;
13249   __asm__ ("uadalp %0.8h,%2.16b"
13250            : "=w"(result)
13251            : "0"(a), "w"(b)
13252            : /* No clobbers */);
13253   return result;
13254 }
13255 
13256 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpadalq_u16(uint32x4_t a,uint16x8_t b)13257 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
13258 {
13259   uint32x4_t result;
13260   __asm__ ("uadalp %0.4s,%2.8h"
13261            : "=w"(result)
13262            : "0"(a), "w"(b)
13263            : /* No clobbers */);
13264   return result;
13265 }
13266 
13267 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpadalq_u32(uint64x2_t a,uint32x4_t b)13268 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
13269 {
13270   uint64x2_t result;
13271   __asm__ ("uadalp %0.2d,%2.4s"
13272            : "=w"(result)
13273            : "0"(a), "w"(b)
13274            : /* No clobbers */);
13275   return result;
13276 }
13277 
13278 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpadd_f32(float32x2_t a,float32x2_t b)13279 vpadd_f32 (float32x2_t a, float32x2_t b)
13280 {
13281   float32x2_t result;
13282   __asm__ ("faddp %0.2s,%1.2s,%2.2s"
13283            : "=w"(result)
13284            : "w"(a), "w"(b)
13285            : /* No clobbers */);
13286   return result;
13287 }
13288 
13289 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpadd_s8(int8x8_t __a,int8x8_t __b)13290 vpadd_s8 (int8x8_t __a, int8x8_t __b)
13291 {
13292   return __builtin_aarch64_addpv8qi (__a, __b);
13293 }
13294 
13295 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpadd_s16(int16x4_t __a,int16x4_t __b)13296 vpadd_s16 (int16x4_t __a, int16x4_t __b)
13297 {
13298   return __builtin_aarch64_addpv4hi (__a, __b);
13299 }
13300 
13301 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpadd_s32(int32x2_t __a,int32x2_t __b)13302 vpadd_s32 (int32x2_t __a, int32x2_t __b)
13303 {
13304   return __builtin_aarch64_addpv2si (__a, __b);
13305 }
13306 
13307 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpadd_u8(uint8x8_t __a,uint8x8_t __b)13308 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
13309 {
13310   return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
13311 						 (int8x8_t) __b);
13312 }
13313 
13314 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpadd_u16(uint16x4_t __a,uint16x4_t __b)13315 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
13316 {
13317   return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
13318 						  (int16x4_t) __b);
13319 }
13320 
13321 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpadd_u32(uint32x2_t __a,uint32x2_t __b)13322 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
13323 {
13324   return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
13325 						  (int32x2_t) __b);
13326 }
13327 
13328 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpaddd_f64(float64x2_t a)13329 vpaddd_f64 (float64x2_t a)
13330 {
13331   float64_t result;
13332   __asm__ ("faddp %d0,%1.2d"
13333            : "=w"(result)
13334            : "w"(a)
13335            : /* No clobbers */);
13336   return result;
13337 }
13338 
13339 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpaddl_s8(int8x8_t a)13340 vpaddl_s8 (int8x8_t a)
13341 {
13342   int16x4_t result;
13343   __asm__ ("saddlp %0.4h,%1.8b"
13344            : "=w"(result)
13345            : "w"(a)
13346            : /* No clobbers */);
13347   return result;
13348 }
13349 
13350 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpaddl_s16(int16x4_t a)13351 vpaddl_s16 (int16x4_t a)
13352 {
13353   int32x2_t result;
13354   __asm__ ("saddlp %0.2s,%1.4h"
13355            : "=w"(result)
13356            : "w"(a)
13357            : /* No clobbers */);
13358   return result;
13359 }
13360 
13361 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vpaddl_s32(int32x2_t a)13362 vpaddl_s32 (int32x2_t a)
13363 {
13364   int64x1_t result;
13365   __asm__ ("saddlp %0.1d,%1.2s"
13366            : "=w"(result)
13367            : "w"(a)
13368            : /* No clobbers */);
13369   return result;
13370 }
13371 
13372 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpaddl_u8(uint8x8_t a)13373 vpaddl_u8 (uint8x8_t a)
13374 {
13375   uint16x4_t result;
13376   __asm__ ("uaddlp %0.4h,%1.8b"
13377            : "=w"(result)
13378            : "w"(a)
13379            : /* No clobbers */);
13380   return result;
13381 }
13382 
13383 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpaddl_u16(uint16x4_t a)13384 vpaddl_u16 (uint16x4_t a)
13385 {
13386   uint32x2_t result;
13387   __asm__ ("uaddlp %0.2s,%1.4h"
13388            : "=w"(result)
13389            : "w"(a)
13390            : /* No clobbers */);
13391   return result;
13392 }
13393 
13394 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vpaddl_u32(uint32x2_t a)13395 vpaddl_u32 (uint32x2_t a)
13396 {
13397   uint64x1_t result;
13398   __asm__ ("uaddlp %0.1d,%1.2s"
13399            : "=w"(result)
13400            : "w"(a)
13401            : /* No clobbers */);
13402   return result;
13403 }
13404 
13405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpaddlq_s8(int8x16_t a)13406 vpaddlq_s8 (int8x16_t a)
13407 {
13408   int16x8_t result;
13409   __asm__ ("saddlp %0.8h,%1.16b"
13410            : "=w"(result)
13411            : "w"(a)
13412            : /* No clobbers */);
13413   return result;
13414 }
13415 
13416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpaddlq_s16(int16x8_t a)13417 vpaddlq_s16 (int16x8_t a)
13418 {
13419   int32x4_t result;
13420   __asm__ ("saddlp %0.4s,%1.8h"
13421            : "=w"(result)
13422            : "w"(a)
13423            : /* No clobbers */);
13424   return result;
13425 }
13426 
13427 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpaddlq_s32(int32x4_t a)13428 vpaddlq_s32 (int32x4_t a)
13429 {
13430   int64x2_t result;
13431   __asm__ ("saddlp %0.2d,%1.4s"
13432            : "=w"(result)
13433            : "w"(a)
13434            : /* No clobbers */);
13435   return result;
13436 }
13437 
13438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpaddlq_u8(uint8x16_t a)13439 vpaddlq_u8 (uint8x16_t a)
13440 {
13441   uint16x8_t result;
13442   __asm__ ("uaddlp %0.8h,%1.16b"
13443            : "=w"(result)
13444            : "w"(a)
13445            : /* No clobbers */);
13446   return result;
13447 }
13448 
13449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpaddlq_u16(uint16x8_t a)13450 vpaddlq_u16 (uint16x8_t a)
13451 {
13452   uint32x4_t result;
13453   __asm__ ("uaddlp %0.4s,%1.8h"
13454            : "=w"(result)
13455            : "w"(a)
13456            : /* No clobbers */);
13457   return result;
13458 }
13459 
13460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpaddlq_u32(uint32x4_t a)13461 vpaddlq_u32 (uint32x4_t a)
13462 {
13463   uint64x2_t result;
13464   __asm__ ("uaddlp %0.2d,%1.4s"
13465            : "=w"(result)
13466            : "w"(a)
13467            : /* No clobbers */);
13468   return result;
13469 }
13470 
13471 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpaddq_f32(float32x4_t a,float32x4_t b)13472 vpaddq_f32 (float32x4_t a, float32x4_t b)
13473 {
13474   float32x4_t result;
13475   __asm__ ("faddp %0.4s,%1.4s,%2.4s"
13476            : "=w"(result)
13477            : "w"(a), "w"(b)
13478            : /* No clobbers */);
13479   return result;
13480 }
13481 
13482 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpaddq_f64(float64x2_t a,float64x2_t b)13483 vpaddq_f64 (float64x2_t a, float64x2_t b)
13484 {
13485   float64x2_t result;
13486   __asm__ ("faddp %0.2d,%1.2d,%2.2d"
13487            : "=w"(result)
13488            : "w"(a), "w"(b)
13489            : /* No clobbers */);
13490   return result;
13491 }
13492 
13493 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpaddq_s8(int8x16_t a,int8x16_t b)13494 vpaddq_s8 (int8x16_t a, int8x16_t b)
13495 {
13496   int8x16_t result;
13497   __asm__ ("addp %0.16b,%1.16b,%2.16b"
13498            : "=w"(result)
13499            : "w"(a), "w"(b)
13500            : /* No clobbers */);
13501   return result;
13502 }
13503 
13504 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpaddq_s16(int16x8_t a,int16x8_t b)13505 vpaddq_s16 (int16x8_t a, int16x8_t b)
13506 {
13507   int16x8_t result;
13508   __asm__ ("addp %0.8h,%1.8h,%2.8h"
13509            : "=w"(result)
13510            : "w"(a), "w"(b)
13511            : /* No clobbers */);
13512   return result;
13513 }
13514 
13515 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpaddq_s32(int32x4_t a,int32x4_t b)13516 vpaddq_s32 (int32x4_t a, int32x4_t b)
13517 {
13518   int32x4_t result;
13519   __asm__ ("addp %0.4s,%1.4s,%2.4s"
13520            : "=w"(result)
13521            : "w"(a), "w"(b)
13522            : /* No clobbers */);
13523   return result;
13524 }
13525 
13526 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vpaddq_s64(int64x2_t a,int64x2_t b)13527 vpaddq_s64 (int64x2_t a, int64x2_t b)
13528 {
13529   int64x2_t result;
13530   __asm__ ("addp %0.2d,%1.2d,%2.2d"
13531            : "=w"(result)
13532            : "w"(a), "w"(b)
13533            : /* No clobbers */);
13534   return result;
13535 }
13536 
13537 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpaddq_u8(uint8x16_t a,uint8x16_t b)13538 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
13539 {
13540   uint8x16_t result;
13541   __asm__ ("addp %0.16b,%1.16b,%2.16b"
13542            : "=w"(result)
13543            : "w"(a), "w"(b)
13544            : /* No clobbers */);
13545   return result;
13546 }
13547 
13548 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpaddq_u16(uint16x8_t a,uint16x8_t b)13549 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
13550 {
13551   uint16x8_t result;
13552   __asm__ ("addp %0.8h,%1.8h,%2.8h"
13553            : "=w"(result)
13554            : "w"(a), "w"(b)
13555            : /* No clobbers */);
13556   return result;
13557 }
13558 
13559 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpaddq_u32(uint32x4_t a,uint32x4_t b)13560 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
13561 {
13562   uint32x4_t result;
13563   __asm__ ("addp %0.4s,%1.4s,%2.4s"
13564            : "=w"(result)
13565            : "w"(a), "w"(b)
13566            : /* No clobbers */);
13567   return result;
13568 }
13569 
13570 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vpaddq_u64(uint64x2_t a,uint64x2_t b)13571 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
13572 {
13573   uint64x2_t result;
13574   __asm__ ("addp %0.2d,%1.2d,%2.2d"
13575            : "=w"(result)
13576            : "w"(a), "w"(b)
13577            : /* No clobbers */);
13578   return result;
13579 }
13580 
13581 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpadds_f32(float32x2_t a)13582 vpadds_f32 (float32x2_t a)
13583 {
13584   float32_t result;
13585   __asm__ ("faddp %s0,%1.2s"
13586            : "=w"(result)
13587            : "w"(a)
13588            : /* No clobbers */);
13589   return result;
13590 }
13591 
13592 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmax_f32(float32x2_t a,float32x2_t b)13593 vpmax_f32 (float32x2_t a, float32x2_t b)
13594 {
13595   float32x2_t result;
13596   __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
13597            : "=w"(result)
13598            : "w"(a), "w"(b)
13599            : /* No clobbers */);
13600   return result;
13601 }
13602 
13603 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpmax_s8(int8x8_t a,int8x8_t b)13604 vpmax_s8 (int8x8_t a, int8x8_t b)
13605 {
13606   int8x8_t result;
13607   __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
13608            : "=w"(result)
13609            : "w"(a), "w"(b)
13610            : /* No clobbers */);
13611   return result;
13612 }
13613 
13614 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpmax_s16(int16x4_t a,int16x4_t b)13615 vpmax_s16 (int16x4_t a, int16x4_t b)
13616 {
13617   int16x4_t result;
13618   __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
13619            : "=w"(result)
13620            : "w"(a), "w"(b)
13621            : /* No clobbers */);
13622   return result;
13623 }
13624 
13625 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpmax_s32(int32x2_t a,int32x2_t b)13626 vpmax_s32 (int32x2_t a, int32x2_t b)
13627 {
13628   int32x2_t result;
13629   __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
13630            : "=w"(result)
13631            : "w"(a), "w"(b)
13632            : /* No clobbers */);
13633   return result;
13634 }
13635 
13636 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpmax_u8(uint8x8_t a,uint8x8_t b)13637 vpmax_u8 (uint8x8_t a, uint8x8_t b)
13638 {
13639   uint8x8_t result;
13640   __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
13641            : "=w"(result)
13642            : "w"(a), "w"(b)
13643            : /* No clobbers */);
13644   return result;
13645 }
13646 
13647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpmax_u16(uint16x4_t a,uint16x4_t b)13648 vpmax_u16 (uint16x4_t a, uint16x4_t b)
13649 {
13650   uint16x4_t result;
13651   __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
13652            : "=w"(result)
13653            : "w"(a), "w"(b)
13654            : /* No clobbers */);
13655   return result;
13656 }
13657 
13658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpmax_u32(uint32x2_t a,uint32x2_t b)13659 vpmax_u32 (uint32x2_t a, uint32x2_t b)
13660 {
13661   uint32x2_t result;
13662   __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
13663            : "=w"(result)
13664            : "w"(a), "w"(b)
13665            : /* No clobbers */);
13666   return result;
13667 }
13668 
13669 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmaxnm_f32(float32x2_t a,float32x2_t b)13670 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
13671 {
13672   float32x2_t result;
13673   __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
13674            : "=w"(result)
13675            : "w"(a), "w"(b)
13676            : /* No clobbers */);
13677   return result;
13678 }
13679 
13680 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpmaxnmq_f32(float32x4_t a,float32x4_t b)13681 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
13682 {
13683   float32x4_t result;
13684   __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
13685            : "=w"(result)
13686            : "w"(a), "w"(b)
13687            : /* No clobbers */);
13688   return result;
13689 }
13690 
13691 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpmaxnmq_f64(float64x2_t a,float64x2_t b)13692 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
13693 {
13694   float64x2_t result;
13695   __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
13696            : "=w"(result)
13697            : "w"(a), "w"(b)
13698            : /* No clobbers */);
13699   return result;
13700 }
13701 
13702 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpmaxnmqd_f64(float64x2_t a)13703 vpmaxnmqd_f64 (float64x2_t a)
13704 {
13705   float64_t result;
13706   __asm__ ("fmaxnmp %d0,%1.2d"
13707            : "=w"(result)
13708            : "w"(a)
13709            : /* No clobbers */);
13710   return result;
13711 }
13712 
13713 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmaxnms_f32(float32x2_t a)13714 vpmaxnms_f32 (float32x2_t a)
13715 {
13716   float32_t result;
13717   __asm__ ("fmaxnmp %s0,%1.2s"
13718            : "=w"(result)
13719            : "w"(a)
13720            : /* No clobbers */);
13721   return result;
13722 }
13723 
13724 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpmaxq_f32(float32x4_t a,float32x4_t b)13725 vpmaxq_f32 (float32x4_t a, float32x4_t b)
13726 {
13727   float32x4_t result;
13728   __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
13729            : "=w"(result)
13730            : "w"(a), "w"(b)
13731            : /* No clobbers */);
13732   return result;
13733 }
13734 
13735 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpmaxq_f64(float64x2_t a,float64x2_t b)13736 vpmaxq_f64 (float64x2_t a, float64x2_t b)
13737 {
13738   float64x2_t result;
13739   __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
13740            : "=w"(result)
13741            : "w"(a), "w"(b)
13742            : /* No clobbers */);
13743   return result;
13744 }
13745 
13746 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpmaxq_s8(int8x16_t a,int8x16_t b)13747 vpmaxq_s8 (int8x16_t a, int8x16_t b)
13748 {
13749   int8x16_t result;
13750   __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
13751            : "=w"(result)
13752            : "w"(a), "w"(b)
13753            : /* No clobbers */);
13754   return result;
13755 }
13756 
13757 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpmaxq_s16(int16x8_t a,int16x8_t b)13758 vpmaxq_s16 (int16x8_t a, int16x8_t b)
13759 {
13760   int16x8_t result;
13761   __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
13762            : "=w"(result)
13763            : "w"(a), "w"(b)
13764            : /* No clobbers */);
13765   return result;
13766 }
13767 
13768 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpmaxq_s32(int32x4_t a,int32x4_t b)13769 vpmaxq_s32 (int32x4_t a, int32x4_t b)
13770 {
13771   int32x4_t result;
13772   __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
13773            : "=w"(result)
13774            : "w"(a), "w"(b)
13775            : /* No clobbers */);
13776   return result;
13777 }
13778 
13779 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpmaxq_u8(uint8x16_t a,uint8x16_t b)13780 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
13781 {
13782   uint8x16_t result;
13783   __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
13784            : "=w"(result)
13785            : "w"(a), "w"(b)
13786            : /* No clobbers */);
13787   return result;
13788 }
13789 
13790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpmaxq_u16(uint16x8_t a,uint16x8_t b)13791 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
13792 {
13793   uint16x8_t result;
13794   __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
13795            : "=w"(result)
13796            : "w"(a), "w"(b)
13797            : /* No clobbers */);
13798   return result;
13799 }
13800 
13801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpmaxq_u32(uint32x4_t a,uint32x4_t b)13802 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
13803 {
13804   uint32x4_t result;
13805   __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
13806            : "=w"(result)
13807            : "w"(a), "w"(b)
13808            : /* No clobbers */);
13809   return result;
13810 }
13811 
13812 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpmaxqd_f64(float64x2_t a)13813 vpmaxqd_f64 (float64x2_t a)
13814 {
13815   float64_t result;
13816   __asm__ ("fmaxp %d0,%1.2d"
13817            : "=w"(result)
13818            : "w"(a)
13819            : /* No clobbers */);
13820   return result;
13821 }
13822 
13823 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmaxs_f32(float32x2_t a)13824 vpmaxs_f32 (float32x2_t a)
13825 {
13826   float32_t result;
13827   __asm__ ("fmaxp %s0,%1.2s"
13828            : "=w"(result)
13829            : "w"(a)
13830            : /* No clobbers */);
13831   return result;
13832 }
13833 
13834 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpmin_f32(float32x2_t a,float32x2_t b)13835 vpmin_f32 (float32x2_t a, float32x2_t b)
13836 {
13837   float32x2_t result;
13838   __asm__ ("fminp %0.2s, %1.2s, %2.2s"
13839            : "=w"(result)
13840            : "w"(a), "w"(b)
13841            : /* No clobbers */);
13842   return result;
13843 }
13844 
13845 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vpmin_s8(int8x8_t a,int8x8_t b)13846 vpmin_s8 (int8x8_t a, int8x8_t b)
13847 {
13848   int8x8_t result;
13849   __asm__ ("sminp %0.8b, %1.8b, %2.8b"
13850            : "=w"(result)
13851            : "w"(a), "w"(b)
13852            : /* No clobbers */);
13853   return result;
13854 }
13855 
13856 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpmin_s16(int16x4_t a,int16x4_t b)13857 vpmin_s16 (int16x4_t a, int16x4_t b)
13858 {
13859   int16x4_t result;
13860   __asm__ ("sminp %0.4h, %1.4h, %2.4h"
13861            : "=w"(result)
13862            : "w"(a), "w"(b)
13863            : /* No clobbers */);
13864   return result;
13865 }
13866 
13867 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vpmin_s32(int32x2_t a,int32x2_t b)13868 vpmin_s32 (int32x2_t a, int32x2_t b)
13869 {
13870   int32x2_t result;
13871   __asm__ ("sminp %0.2s, %1.2s, %2.2s"
13872            : "=w"(result)
13873            : "w"(a), "w"(b)
13874            : /* No clobbers */);
13875   return result;
13876 }
13877 
13878 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vpmin_u8(uint8x8_t a,uint8x8_t b)13879 vpmin_u8 (uint8x8_t a, uint8x8_t b)
13880 {
13881   uint8x8_t result;
13882   __asm__ ("uminp %0.8b, %1.8b, %2.8b"
13883            : "=w"(result)
13884            : "w"(a), "w"(b)
13885            : /* No clobbers */);
13886   return result;
13887 }
13888 
13889 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vpmin_u16(uint16x4_t a,uint16x4_t b)13890 vpmin_u16 (uint16x4_t a, uint16x4_t b)
13891 {
13892   uint16x4_t result;
13893   __asm__ ("uminp %0.4h, %1.4h, %2.4h"
13894            : "=w"(result)
13895            : "w"(a), "w"(b)
13896            : /* No clobbers */);
13897   return result;
13898 }
13899 
13900 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vpmin_u32(uint32x2_t a,uint32x2_t b)13901 vpmin_u32 (uint32x2_t a, uint32x2_t b)
13902 {
13903   uint32x2_t result;
13904   __asm__ ("uminp %0.2s, %1.2s, %2.2s"
13905            : "=w"(result)
13906            : "w"(a), "w"(b)
13907            : /* No clobbers */);
13908   return result;
13909 }
13910 
13911 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vpminnm_f32(float32x2_t a,float32x2_t b)13912 vpminnm_f32 (float32x2_t a, float32x2_t b)
13913 {
13914   float32x2_t result;
13915   __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
13916            : "=w"(result)
13917            : "w"(a), "w"(b)
13918            : /* No clobbers */);
13919   return result;
13920 }
13921 
13922 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpminnmq_f32(float32x4_t a,float32x4_t b)13923 vpminnmq_f32 (float32x4_t a, float32x4_t b)
13924 {
13925   float32x4_t result;
13926   __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
13927            : "=w"(result)
13928            : "w"(a), "w"(b)
13929            : /* No clobbers */);
13930   return result;
13931 }
13932 
13933 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpminnmq_f64(float64x2_t a,float64x2_t b)13934 vpminnmq_f64 (float64x2_t a, float64x2_t b)
13935 {
13936   float64x2_t result;
13937   __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
13938            : "=w"(result)
13939            : "w"(a), "w"(b)
13940            : /* No clobbers */);
13941   return result;
13942 }
13943 
13944 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpminnmqd_f64(float64x2_t a)13945 vpminnmqd_f64 (float64x2_t a)
13946 {
13947   float64_t result;
13948   __asm__ ("fminnmp %d0,%1.2d"
13949            : "=w"(result)
13950            : "w"(a)
13951            : /* No clobbers */);
13952   return result;
13953 }
13954 
13955 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpminnms_f32(float32x2_t a)13956 vpminnms_f32 (float32x2_t a)
13957 {
13958   float32_t result;
13959   __asm__ ("fminnmp %s0,%1.2s"
13960            : "=w"(result)
13961            : "w"(a)
13962            : /* No clobbers */);
13963   return result;
13964 }
13965 
13966 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vpminq_f32(float32x4_t a,float32x4_t b)13967 vpminq_f32 (float32x4_t a, float32x4_t b)
13968 {
13969   float32x4_t result;
13970   __asm__ ("fminp %0.4s, %1.4s, %2.4s"
13971            : "=w"(result)
13972            : "w"(a), "w"(b)
13973            : /* No clobbers */);
13974   return result;
13975 }
13976 
13977 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vpminq_f64(float64x2_t a,float64x2_t b)13978 vpminq_f64 (float64x2_t a, float64x2_t b)
13979 {
13980   float64x2_t result;
13981   __asm__ ("fminp %0.2d, %1.2d, %2.2d"
13982            : "=w"(result)
13983            : "w"(a), "w"(b)
13984            : /* No clobbers */);
13985   return result;
13986 }
13987 
13988 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vpminq_s8(int8x16_t a,int8x16_t b)13989 vpminq_s8 (int8x16_t a, int8x16_t b)
13990 {
13991   int8x16_t result;
13992   __asm__ ("sminp %0.16b, %1.16b, %2.16b"
13993            : "=w"(result)
13994            : "w"(a), "w"(b)
13995            : /* No clobbers */);
13996   return result;
13997 }
13998 
13999 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vpminq_s16(int16x8_t a,int16x8_t b)14000 vpminq_s16 (int16x8_t a, int16x8_t b)
14001 {
14002   int16x8_t result;
14003   __asm__ ("sminp %0.8h, %1.8h, %2.8h"
14004            : "=w"(result)
14005            : "w"(a), "w"(b)
14006            : /* No clobbers */);
14007   return result;
14008 }
14009 
14010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vpminq_s32(int32x4_t a,int32x4_t b)14011 vpminq_s32 (int32x4_t a, int32x4_t b)
14012 {
14013   int32x4_t result;
14014   __asm__ ("sminp %0.4s, %1.4s, %2.4s"
14015            : "=w"(result)
14016            : "w"(a), "w"(b)
14017            : /* No clobbers */);
14018   return result;
14019 }
14020 
14021 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vpminq_u8(uint8x16_t a,uint8x16_t b)14022 vpminq_u8 (uint8x16_t a, uint8x16_t b)
14023 {
14024   uint8x16_t result;
14025   __asm__ ("uminp %0.16b, %1.16b, %2.16b"
14026            : "=w"(result)
14027            : "w"(a), "w"(b)
14028            : /* No clobbers */);
14029   return result;
14030 }
14031 
14032 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vpminq_u16(uint16x8_t a,uint16x8_t b)14033 vpminq_u16 (uint16x8_t a, uint16x8_t b)
14034 {
14035   uint16x8_t result;
14036   __asm__ ("uminp %0.8h, %1.8h, %2.8h"
14037            : "=w"(result)
14038            : "w"(a), "w"(b)
14039            : /* No clobbers */);
14040   return result;
14041 }
14042 
14043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vpminq_u32(uint32x4_t a,uint32x4_t b)14044 vpminq_u32 (uint32x4_t a, uint32x4_t b)
14045 {
14046   uint32x4_t result;
14047   __asm__ ("uminp %0.4s, %1.4s, %2.4s"
14048            : "=w"(result)
14049            : "w"(a), "w"(b)
14050            : /* No clobbers */);
14051   return result;
14052 }
14053 
14054 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vpminqd_f64(float64x2_t a)14055 vpminqd_f64 (float64x2_t a)
14056 {
14057   float64_t result;
14058   __asm__ ("fminp %d0,%1.2d"
14059            : "=w"(result)
14060            : "w"(a)
14061            : /* No clobbers */);
14062   return result;
14063 }
14064 
14065 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vpmins_f32(float32x2_t a)14066 vpmins_f32 (float32x2_t a)
14067 {
14068   float32_t result;
14069   __asm__ ("fminp %s0,%1.2s"
14070            : "=w"(result)
14071            : "w"(a)
14072            : /* No clobbers */);
14073   return result;
14074 }
14075 
14076 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_n_s16(int16x4_t a,int16_t b)14077 vqdmulh_n_s16 (int16x4_t a, int16_t b)
14078 {
14079   int16x4_t result;
14080   __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
14081            : "=w"(result)
14082            : "w"(a), "w"(b)
14083            : /* No clobbers */);
14084   return result;
14085 }
14086 
14087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_n_s32(int32x2_t a,int32_t b)14088 vqdmulh_n_s32 (int32x2_t a, int32_t b)
14089 {
14090   int32x2_t result;
14091   __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
14092            : "=w"(result)
14093            : "w"(a), "w"(b)
14094            : /* No clobbers */);
14095   return result;
14096 }
14097 
14098 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_n_s16(int16x8_t a,int16_t b)14099 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
14100 {
14101   int16x8_t result;
14102   __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
14103            : "=w"(result)
14104            : "w"(a), "w"(b)
14105            : /* No clobbers */);
14106   return result;
14107 }
14108 
14109 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_n_s32(int32x4_t a,int32_t b)14110 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
14111 {
14112   int32x4_t result;
14113   __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
14114            : "=w"(result)
14115            : "w"(a), "w"(b)
14116            : /* No clobbers */);
14117   return result;
14118 }
14119 
14120 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqmovn_high_s16(int8x8_t a,int16x8_t b)14121 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
14122 {
14123   int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
14124   __asm__ ("sqxtn2 %0.16b, %1.8h"
14125            : "+w"(result)
14126            : "w"(b)
14127            : /* No clobbers */);
14128   return result;
14129 }
14130 
14131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqmovn_high_s32(int16x4_t a,int32x4_t b)14132 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
14133 {
14134   int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
14135   __asm__ ("sqxtn2 %0.8h, %1.4s"
14136            : "+w"(result)
14137            : "w"(b)
14138            : /* No clobbers */);
14139   return result;
14140 }
14141 
14142 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqmovn_high_s64(int32x2_t a,int64x2_t b)14143 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
14144 {
14145   int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
14146   __asm__ ("sqxtn2 %0.4s, %1.2d"
14147            : "+w"(result)
14148            : "w"(b)
14149            : /* No clobbers */);
14150   return result;
14151 }
14152 
14153 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovn_high_u16(uint8x8_t a,uint16x8_t b)14154 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
14155 {
14156   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14157   __asm__ ("uqxtn2 %0.16b, %1.8h"
14158            : "+w"(result)
14159            : "w"(b)
14160            : /* No clobbers */);
14161   return result;
14162 }
14163 
14164 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovn_high_u32(uint16x4_t a,uint32x4_t b)14165 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
14166 {
14167   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14168   __asm__ ("uqxtn2 %0.8h, %1.4s"
14169            : "+w"(result)
14170            : "w"(b)
14171            : /* No clobbers */);
14172   return result;
14173 }
14174 
14175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovn_high_u64(uint32x2_t a,uint64x2_t b)14176 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
14177 {
14178   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14179   __asm__ ("uqxtn2 %0.4s, %1.2d"
14180            : "+w"(result)
14181            : "w"(b)
14182            : /* No clobbers */);
14183   return result;
14184 }
14185 
14186 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovun_high_s16(uint8x8_t a,int16x8_t b)14187 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
14188 {
14189   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14190   __asm__ ("sqxtun2 %0.16b, %1.8h"
14191            : "+w"(result)
14192            : "w"(b)
14193            : /* No clobbers */);
14194   return result;
14195 }
14196 
14197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovun_high_s32(uint16x4_t a,int32x4_t b)14198 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
14199 {
14200   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14201   __asm__ ("sqxtun2 %0.8h, %1.4s"
14202            : "+w"(result)
14203            : "w"(b)
14204            : /* No clobbers */);
14205   return result;
14206 }
14207 
14208 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovun_high_s64(uint32x2_t a,int64x2_t b)14209 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
14210 {
14211   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14212   __asm__ ("sqxtun2 %0.4s, %1.2d"
14213            : "+w"(result)
14214            : "w"(b)
14215            : /* No clobbers */);
14216   return result;
14217 }
14218 
14219 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_n_s16(int16x4_t a,int16_t b)14220 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
14221 {
14222   int16x4_t result;
14223   __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
14224            : "=w"(result)
14225            : "w"(a), "w"(b)
14226            : /* No clobbers */);
14227   return result;
14228 }
14229 
14230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_n_s32(int32x2_t a,int32_t b)14231 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
14232 {
14233   int32x2_t result;
14234   __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
14235            : "=w"(result)
14236            : "w"(a), "w"(b)
14237            : /* No clobbers */);
14238   return result;
14239 }
14240 
14241 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_n_s16(int16x8_t a,int16_t b)14242 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
14243 {
14244   int16x8_t result;
14245   __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
14246            : "=w"(result)
14247            : "w"(a), "w"(b)
14248            : /* No clobbers */);
14249   return result;
14250 }
14251 
14252 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_n_s32(int32x4_t a,int32_t b)14253 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
14254 {
14255   int32x4_t result;
14256   __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
14257            : "=w"(result)
14258            : "w"(a), "w"(b)
14259            : /* No clobbers */);
14260   return result;
14261 }
14262 
14263 #define vqrshrn_high_n_s16(a, b, c)                                     \
14264   __extension__                                                         \
14265     ({                                                                  \
14266        int16x8_t b_ = (b);                                              \
14267        int8x8_t a_ = (a);                                               \
14268        int8x16_t result = vcombine_s8                                   \
14269                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
14270        __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2"                           \
14271                 : "+w"(result)                                          \
14272                 : "w"(b_), "i"(c)                                       \
14273                 : /* No clobbers */);                                   \
14274        result;                                                          \
14275      })
14276 
14277 #define vqrshrn_high_n_s32(a, b, c)                                     \
14278   __extension__                                                         \
14279     ({                                                                  \
14280        int32x4_t b_ = (b);                                              \
14281        int16x4_t a_ = (a);                                              \
14282        int16x8_t result = vcombine_s16                                  \
14283                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
14284        __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2"                            \
14285                 : "+w"(result)                                          \
14286                 : "w"(b_), "i"(c)                                       \
14287                 : /* No clobbers */);                                   \
14288        result;                                                          \
14289      })
14290 
14291 #define vqrshrn_high_n_s64(a, b, c)                                     \
14292   __extension__                                                         \
14293     ({                                                                  \
14294        int64x2_t b_ = (b);                                              \
14295        int32x2_t a_ = (a);                                              \
14296        int32x4_t result = vcombine_s32                                  \
14297                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
14298        __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2"                            \
14299                 : "+w"(result)                                          \
14300                 : "w"(b_), "i"(c)                                       \
14301                 : /* No clobbers */);                                   \
14302        result;                                                          \
14303      })
14304 
14305 #define vqrshrn_high_n_u16(a, b, c)                                     \
14306   __extension__                                                         \
14307     ({                                                                  \
14308        uint16x8_t b_ = (b);                                             \
14309        uint8x8_t a_ = (a);                                              \
14310        uint8x16_t result = vcombine_u8                                  \
14311                              (a_, vcreate_u8 (UINT64_C (0x0)));         \
14312        __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2"                           \
14313                 : "+w"(result)                                          \
14314                 : "w"(b_), "i"(c)                                       \
14315                 : /* No clobbers */);                                   \
14316        result;                                                          \
14317      })
14318 
14319 #define vqrshrn_high_n_u32(a, b, c)                                     \
14320   __extension__                                                         \
14321     ({                                                                  \
14322        uint32x4_t b_ = (b);                                             \
14323        uint16x4_t a_ = (a);                                             \
14324        uint16x8_t result = vcombine_u16                                 \
14325                              (a_, vcreate_u16 (UINT64_C (0x0)));        \
14326        __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2"                            \
14327                 : "+w"(result)                                          \
14328                 : "w"(b_), "i"(c)                                       \
14329                 : /* No clobbers */);                                   \
14330        result;                                                          \
14331      })
14332 
14333 #define vqrshrn_high_n_u64(a, b, c)                                     \
14334   __extension__                                                         \
14335     ({                                                                  \
14336        uint64x2_t b_ = (b);                                             \
14337        uint32x2_t a_ = (a);                                             \
14338        uint32x4_t result = vcombine_u32                                 \
14339                              (a_, vcreate_u32 (UINT64_C (0x0)));        \
14340        __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2"                            \
14341                 : "+w"(result)                                          \
14342                 : "w"(b_), "i"(c)                                       \
14343                 : /* No clobbers */);                                   \
14344        result;                                                          \
14345      })
14346 
14347 #define vqrshrun_high_n_s16(a, b, c)                                    \
14348   __extension__                                                         \
14349     ({                                                                  \
14350        int16x8_t b_ = (b);                                              \
14351        uint8x8_t a_ = (a);                                              \
14352        uint8x16_t result = vcombine_u8                                  \
14353                              (a_, vcreate_u8 (UINT64_C (0x0)));         \
14354        __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2"                          \
14355                 : "+w"(result)                                          \
14356                 : "w"(b_), "i"(c)                                       \
14357                 : /* No clobbers */);                                   \
14358        result;                                                          \
14359      })
14360 
14361 #define vqrshrun_high_n_s32(a, b, c)                                    \
14362   __extension__                                                         \
14363     ({                                                                  \
14364        int32x4_t b_ = (b);                                              \
14365        uint16x4_t a_ = (a);                                             \
14366        uint16x8_t result = vcombine_u16                                 \
14367                              (a_, vcreate_u16 (UINT64_C (0x0)));        \
14368        __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2"                           \
14369                 : "+w"(result)                                          \
14370                 : "w"(b_), "i"(c)                                       \
14371                 : /* No clobbers */);                                   \
14372        result;                                                          \
14373      })
14374 
14375 #define vqrshrun_high_n_s64(a, b, c)                                    \
14376   __extension__                                                         \
14377     ({                                                                  \
14378        int64x2_t b_ = (b);                                              \
14379        uint32x2_t a_ = (a);                                             \
14380        uint32x4_t result = vcombine_u32                                 \
14381                              (a_, vcreate_u32 (UINT64_C (0x0)));        \
14382        __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2"                           \
14383                 : "+w"(result)                                          \
14384                 : "w"(b_), "i"(c)                                       \
14385                 : /* No clobbers */);                                   \
14386        result;                                                          \
14387      })
14388 
14389 #define vqshrn_high_n_s16(a, b, c)                                      \
14390   __extension__                                                         \
14391     ({                                                                  \
14392        int16x8_t b_ = (b);                                              \
14393        int8x8_t a_ = (a);                                               \
14394        int8x16_t result = vcombine_s8                                   \
14395                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
14396        __asm__ ("sqshrn2 %0.16b, %1.8h, #%2"                            \
14397                 : "+w"(result)                                          \
14398                 : "w"(b_), "i"(c)                                       \
14399                 : /* No clobbers */);                                   \
14400        result;                                                          \
14401      })
14402 
14403 #define vqshrn_high_n_s32(a, b, c)                                      \
14404   __extension__                                                         \
14405     ({                                                                  \
14406        int32x4_t b_ = (b);                                              \
14407        int16x4_t a_ = (a);                                              \
14408        int16x8_t result = vcombine_s16                                  \
14409                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
14410        __asm__ ("sqshrn2 %0.8h, %1.4s, #%2"                             \
14411                 : "+w"(result)                                          \
14412                 : "w"(b_), "i"(c)                                       \
14413                 : /* No clobbers */);                                   \
14414        result;                                                          \
14415      })
14416 
14417 #define vqshrn_high_n_s64(a, b, c)                                      \
14418   __extension__                                                         \
14419     ({                                                                  \
14420        int64x2_t b_ = (b);                                              \
14421        int32x2_t a_ = (a);                                              \
14422        int32x4_t result = vcombine_s32                                  \
14423                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
14424        __asm__ ("sqshrn2 %0.4s, %1.2d, #%2"                             \
14425                 : "+w"(result)                                          \
14426                 : "w"(b_), "i"(c)                                       \
14427                 : /* No clobbers */);                                   \
14428        result;                                                          \
14429      })
14430 
14431 #define vqshrn_high_n_u16(a, b, c)                                      \
14432   __extension__                                                         \
14433     ({                                                                  \
14434        uint16x8_t b_ = (b);                                             \
14435        uint8x8_t a_ = (a);                                              \
14436        uint8x16_t result = vcombine_u8                                  \
14437                              (a_, vcreate_u8 (UINT64_C (0x0)));         \
14438        __asm__ ("uqshrn2 %0.16b, %1.8h, #%2"                            \
14439                 : "+w"(result)                                          \
14440                 : "w"(b_), "i"(c)                                       \
14441                 : /* No clobbers */);                                   \
14442        result;                                                          \
14443      })
14444 
14445 #define vqshrn_high_n_u32(a, b, c)                                      \
14446   __extension__                                                         \
14447     ({                                                                  \
14448        uint32x4_t b_ = (b);                                             \
14449        uint16x4_t a_ = (a);                                             \
14450        uint16x8_t result = vcombine_u16                                 \
14451                              (a_, vcreate_u16 (UINT64_C (0x0)));        \
14452        __asm__ ("uqshrn2 %0.8h, %1.4s, #%2"                             \
14453                 : "+w"(result)                                          \
14454                 : "w"(b_), "i"(c)                                       \
14455                 : /* No clobbers */);                                   \
14456        result;                                                          \
14457      })
14458 
14459 #define vqshrn_high_n_u64(a, b, c)                                      \
14460   __extension__                                                         \
14461     ({                                                                  \
14462        uint64x2_t b_ = (b);                                             \
14463        uint32x2_t a_ = (a);                                             \
14464        uint32x4_t result = vcombine_u32                                 \
14465                              (a_, vcreate_u32 (UINT64_C (0x0)));        \
14466        __asm__ ("uqshrn2 %0.4s, %1.2d, #%2"                             \
14467                 : "+w"(result)                                          \
14468                 : "w"(b_), "i"(c)                                       \
14469                 : /* No clobbers */);                                   \
14470        result;                                                          \
14471      })
14472 
14473 #define vqshrun_high_n_s16(a, b, c)                                     \
14474   __extension__                                                         \
14475     ({                                                                  \
14476        int16x8_t b_ = (b);                                              \
14477        uint8x8_t a_ = (a);                                              \
14478        uint8x16_t result = vcombine_u8                                  \
14479                              (a_, vcreate_u8 (UINT64_C (0x0)));         \
14480        __asm__ ("sqshrun2 %0.16b, %1.8h, #%2"                           \
14481                 : "+w"(result)                                          \
14482                 : "w"(b_), "i"(c)                                       \
14483                 : /* No clobbers */);                                   \
14484        result;                                                          \
14485      })
14486 
14487 #define vqshrun_high_n_s32(a, b, c)                                     \
14488   __extension__                                                         \
14489     ({                                                                  \
14490        int32x4_t b_ = (b);                                              \
14491        uint16x4_t a_ = (a);                                             \
14492        uint16x8_t result = vcombine_u16                                 \
14493                              (a_, vcreate_u16 (UINT64_C (0x0)));        \
14494        __asm__ ("sqshrun2 %0.8h, %1.4s, #%2"                            \
14495                 : "+w"(result)                                          \
14496                 : "w"(b_), "i"(c)                                       \
14497                 : /* No clobbers */);                                   \
14498        result;                                                          \
14499      })
14500 
14501 #define vqshrun_high_n_s64(a, b, c)                                     \
14502   __extension__                                                         \
14503     ({                                                                  \
14504        int64x2_t b_ = (b);                                              \
14505        uint32x2_t a_ = (a);                                             \
14506        uint32x4_t result = vcombine_u32                                 \
14507                              (a_, vcreate_u32 (UINT64_C (0x0)));        \
14508        __asm__ ("sqshrun2 %0.4s, %1.2d, #%2"                            \
14509                 : "+w"(result)                                          \
14510                 : "w"(b_), "i"(c)                                       \
14511                 : /* No clobbers */);                                   \
14512        result;                                                          \
14513      })
14514 
14515 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrbit_s8(int8x8_t a)14516 vrbit_s8 (int8x8_t a)
14517 {
14518   int8x8_t result;
14519   __asm__ ("rbit %0.8b,%1.8b"
14520            : "=w"(result)
14521            : "w"(a)
14522            : /* No clobbers */);
14523   return result;
14524 }
14525 
14526 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrbit_u8(uint8x8_t a)14527 vrbit_u8 (uint8x8_t a)
14528 {
14529   uint8x8_t result;
14530   __asm__ ("rbit %0.8b,%1.8b"
14531            : "=w"(result)
14532            : "w"(a)
14533            : /* No clobbers */);
14534   return result;
14535 }
14536 
14537 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrbitq_s8(int8x16_t a)14538 vrbitq_s8 (int8x16_t a)
14539 {
14540   int8x16_t result;
14541   __asm__ ("rbit %0.16b,%1.16b"
14542            : "=w"(result)
14543            : "w"(a)
14544            : /* No clobbers */);
14545   return result;
14546 }
14547 
14548 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrbitq_u8(uint8x16_t a)14549 vrbitq_u8 (uint8x16_t a)
14550 {
14551   uint8x16_t result;
14552   __asm__ ("rbit %0.16b,%1.16b"
14553            : "=w"(result)
14554            : "w"(a)
14555            : /* No clobbers */);
14556   return result;
14557 }
14558 
14559 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecpe_f32(float32x2_t a)14560 vrecpe_f32 (float32x2_t a)
14561 {
14562   float32x2_t result;
14563   __asm__ ("frecpe %0.2s,%1.2s"
14564            : "=w"(result)
14565            : "w"(a)
14566            : /* No clobbers */);
14567   return result;
14568 }
14569 
14570 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrecpe_u32(uint32x2_t a)14571 vrecpe_u32 (uint32x2_t a)
14572 {
14573   uint32x2_t result;
14574   __asm__ ("urecpe %0.2s,%1.2s"
14575            : "=w"(result)
14576            : "w"(a)
14577            : /* No clobbers */);
14578   return result;
14579 }
14580 
14581 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecped_f64(float64_t a)14582 vrecped_f64 (float64_t a)
14583 {
14584   float64_t result;
14585   __asm__ ("frecpe %d0,%d1"
14586            : "=w"(result)
14587            : "w"(a)
14588            : /* No clobbers */);
14589   return result;
14590 }
14591 
14592 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpeq_f32(float32x4_t a)14593 vrecpeq_f32 (float32x4_t a)
14594 {
14595   float32x4_t result;
14596   __asm__ ("frecpe %0.4s,%1.4s"
14597            : "=w"(result)
14598            : "w"(a)
14599            : /* No clobbers */);
14600   return result;
14601 }
14602 
14603 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpeq_f64(float64x2_t a)14604 vrecpeq_f64 (float64x2_t a)
14605 {
14606   float64x2_t result;
14607   __asm__ ("frecpe %0.2d,%1.2d"
14608            : "=w"(result)
14609            : "w"(a)
14610            : /* No clobbers */);
14611   return result;
14612 }
14613 
14614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrecpeq_u32(uint32x4_t a)14615 vrecpeq_u32 (uint32x4_t a)
14616 {
14617   uint32x4_t result;
14618   __asm__ ("urecpe %0.4s,%1.4s"
14619            : "=w"(result)
14620            : "w"(a)
14621            : /* No clobbers */);
14622   return result;
14623 }
14624 
14625 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpes_f32(float32_t a)14626 vrecpes_f32 (float32_t a)
14627 {
14628   float32_t result;
14629   __asm__ ("frecpe %s0,%s1"
14630            : "=w"(result)
14631            : "w"(a)
14632            : /* No clobbers */);
14633   return result;
14634 }
14635 
14636 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecps_f32(float32x2_t a,float32x2_t b)14637 vrecps_f32 (float32x2_t a, float32x2_t b)
14638 {
14639   float32x2_t result;
14640   __asm__ ("frecps %0.2s,%1.2s,%2.2s"
14641            : "=w"(result)
14642            : "w"(a), "w"(b)
14643            : /* No clobbers */);
14644   return result;
14645 }
14646 
14647 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpsd_f64(float64_t a,float64_t b)14648 vrecpsd_f64 (float64_t a, float64_t b)
14649 {
14650   float64_t result;
14651   __asm__ ("frecps %d0,%d1,%d2"
14652            : "=w"(result)
14653            : "w"(a), "w"(b)
14654            : /* No clobbers */);
14655   return result;
14656 }
14657 
14658 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpsq_f32(float32x4_t a,float32x4_t b)14659 vrecpsq_f32 (float32x4_t a, float32x4_t b)
14660 {
14661   float32x4_t result;
14662   __asm__ ("frecps %0.4s,%1.4s,%2.4s"
14663            : "=w"(result)
14664            : "w"(a), "w"(b)
14665            : /* No clobbers */);
14666   return result;
14667 }
14668 
14669 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpsq_f64(float64x2_t a,float64x2_t b)14670 vrecpsq_f64 (float64x2_t a, float64x2_t b)
14671 {
14672   float64x2_t result;
14673   __asm__ ("frecps %0.2d,%1.2d,%2.2d"
14674            : "=w"(result)
14675            : "w"(a), "w"(b)
14676            : /* No clobbers */);
14677   return result;
14678 }
14679 
14680 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpss_f32(float32_t a,float32_t b)14681 vrecpss_f32 (float32_t a, float32_t b)
14682 {
14683   float32_t result;
14684   __asm__ ("frecps %s0,%s1,%s2"
14685            : "=w"(result)
14686            : "w"(a), "w"(b)
14687            : /* No clobbers */);
14688   return result;
14689 }
14690 
14691 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpxd_f64(float64_t a)14692 vrecpxd_f64 (float64_t a)
14693 {
14694   float64_t result;
14695   __asm__ ("frecpe %d0,%d1"
14696            : "=w"(result)
14697            : "w"(a)
14698            : /* No clobbers */);
14699   return result;
14700 }
14701 
14702 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpxs_f32(float32_t a)14703 vrecpxs_f32 (float32_t a)
14704 {
14705   float32_t result;
14706   __asm__ ("frecpe %s0,%s1"
14707            : "=w"(result)
14708            : "w"(a)
14709            : /* No clobbers */);
14710   return result;
14711 }
14712 
14713 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev16_p8(poly8x8_t a)14714 vrev16_p8 (poly8x8_t a)
14715 {
14716   poly8x8_t result;
14717   __asm__ ("rev16 %0.8b,%1.8b"
14718            : "=w"(result)
14719            : "w"(a)
14720            : /* No clobbers */);
14721   return result;
14722 }
14723 
14724 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev16_s8(int8x8_t a)14725 vrev16_s8 (int8x8_t a)
14726 {
14727   int8x8_t result;
14728   __asm__ ("rev16 %0.8b,%1.8b"
14729            : "=w"(result)
14730            : "w"(a)
14731            : /* No clobbers */);
14732   return result;
14733 }
14734 
14735 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev16_u8(uint8x8_t a)14736 vrev16_u8 (uint8x8_t a)
14737 {
14738   uint8x8_t result;
14739   __asm__ ("rev16 %0.8b,%1.8b"
14740            : "=w"(result)
14741            : "w"(a)
14742            : /* No clobbers */);
14743   return result;
14744 }
14745 
14746 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev16q_p8(poly8x16_t a)14747 vrev16q_p8 (poly8x16_t a)
14748 {
14749   poly8x16_t result;
14750   __asm__ ("rev16 %0.16b,%1.16b"
14751            : "=w"(result)
14752            : "w"(a)
14753            : /* No clobbers */);
14754   return result;
14755 }
14756 
14757 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev16q_s8(int8x16_t a)14758 vrev16q_s8 (int8x16_t a)
14759 {
14760   int8x16_t result;
14761   __asm__ ("rev16 %0.16b,%1.16b"
14762            : "=w"(result)
14763            : "w"(a)
14764            : /* No clobbers */);
14765   return result;
14766 }
14767 
14768 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev16q_u8(uint8x16_t a)14769 vrev16q_u8 (uint8x16_t a)
14770 {
14771   uint8x16_t result;
14772   __asm__ ("rev16 %0.16b,%1.16b"
14773            : "=w"(result)
14774            : "w"(a)
14775            : /* No clobbers */);
14776   return result;
14777 }
14778 
14779 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev32_p8(poly8x8_t a)14780 vrev32_p8 (poly8x8_t a)
14781 {
14782   poly8x8_t result;
14783   __asm__ ("rev32 %0.8b,%1.8b"
14784            : "=w"(result)
14785            : "w"(a)
14786            : /* No clobbers */);
14787   return result;
14788 }
14789 
14790 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vrev32_p16(poly16x4_t a)14791 vrev32_p16 (poly16x4_t a)
14792 {
14793   poly16x4_t result;
14794   __asm__ ("rev32 %0.4h,%1.4h"
14795            : "=w"(result)
14796            : "w"(a)
14797            : /* No clobbers */);
14798   return result;
14799 }
14800 
14801 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev32_s8(int8x8_t a)14802 vrev32_s8 (int8x8_t a)
14803 {
14804   int8x8_t result;
14805   __asm__ ("rev32 %0.8b,%1.8b"
14806            : "=w"(result)
14807            : "w"(a)
14808            : /* No clobbers */);
14809   return result;
14810 }
14811 
14812 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrev32_s16(int16x4_t a)14813 vrev32_s16 (int16x4_t a)
14814 {
14815   int16x4_t result;
14816   __asm__ ("rev32 %0.4h,%1.4h"
14817            : "=w"(result)
14818            : "w"(a)
14819            : /* No clobbers */);
14820   return result;
14821 }
14822 
14823 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev32_u8(uint8x8_t a)14824 vrev32_u8 (uint8x8_t a)
14825 {
14826   uint8x8_t result;
14827   __asm__ ("rev32 %0.8b,%1.8b"
14828            : "=w"(result)
14829            : "w"(a)
14830            : /* No clobbers */);
14831   return result;
14832 }
14833 
14834 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrev32_u16(uint16x4_t a)14835 vrev32_u16 (uint16x4_t a)
14836 {
14837   uint16x4_t result;
14838   __asm__ ("rev32 %0.4h,%1.4h"
14839            : "=w"(result)
14840            : "w"(a)
14841            : /* No clobbers */);
14842   return result;
14843 }
14844 
14845 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev32q_p8(poly8x16_t a)14846 vrev32q_p8 (poly8x16_t a)
14847 {
14848   poly8x16_t result;
14849   __asm__ ("rev32 %0.16b,%1.16b"
14850            : "=w"(result)
14851            : "w"(a)
14852            : /* No clobbers */);
14853   return result;
14854 }
14855 
14856 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vrev32q_p16(poly16x8_t a)14857 vrev32q_p16 (poly16x8_t a)
14858 {
14859   poly16x8_t result;
14860   __asm__ ("rev32 %0.8h,%1.8h"
14861            : "=w"(result)
14862            : "w"(a)
14863            : /* No clobbers */);
14864   return result;
14865 }
14866 
14867 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev32q_s8(int8x16_t a)14868 vrev32q_s8 (int8x16_t a)
14869 {
14870   int8x16_t result;
14871   __asm__ ("rev32 %0.16b,%1.16b"
14872            : "=w"(result)
14873            : "w"(a)
14874            : /* No clobbers */);
14875   return result;
14876 }
14877 
14878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrev32q_s16(int16x8_t a)14879 vrev32q_s16 (int16x8_t a)
14880 {
14881   int16x8_t result;
14882   __asm__ ("rev32 %0.8h,%1.8h"
14883            : "=w"(result)
14884            : "w"(a)
14885            : /* No clobbers */);
14886   return result;
14887 }
14888 
14889 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev32q_u8(uint8x16_t a)14890 vrev32q_u8 (uint8x16_t a)
14891 {
14892   uint8x16_t result;
14893   __asm__ ("rev32 %0.16b,%1.16b"
14894            : "=w"(result)
14895            : "w"(a)
14896            : /* No clobbers */);
14897   return result;
14898 }
14899 
14900 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrev32q_u16(uint16x8_t a)14901 vrev32q_u16 (uint16x8_t a)
14902 {
14903   uint16x8_t result;
14904   __asm__ ("rev32 %0.8h,%1.8h"
14905            : "=w"(result)
14906            : "w"(a)
14907            : /* No clobbers */);
14908   return result;
14909 }
14910 
14911 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrev64_f32(float32x2_t a)14912 vrev64_f32 (float32x2_t a)
14913 {
14914   float32x2_t result;
14915   __asm__ ("rev64 %0.2s,%1.2s"
14916            : "=w"(result)
14917            : "w"(a)
14918            : /* No clobbers */);
14919   return result;
14920 }
14921 
14922 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev64_p8(poly8x8_t a)14923 vrev64_p8 (poly8x8_t a)
14924 {
14925   poly8x8_t result;
14926   __asm__ ("rev64 %0.8b,%1.8b"
14927            : "=w"(result)
14928            : "w"(a)
14929            : /* No clobbers */);
14930   return result;
14931 }
14932 
14933 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vrev64_p16(poly16x4_t a)14934 vrev64_p16 (poly16x4_t a)
14935 {
14936   poly16x4_t result;
14937   __asm__ ("rev64 %0.4h,%1.4h"
14938            : "=w"(result)
14939            : "w"(a)
14940            : /* No clobbers */);
14941   return result;
14942 }
14943 
14944 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrev64_s8(int8x8_t a)14945 vrev64_s8 (int8x8_t a)
14946 {
14947   int8x8_t result;
14948   __asm__ ("rev64 %0.8b,%1.8b"
14949            : "=w"(result)
14950            : "w"(a)
14951            : /* No clobbers */);
14952   return result;
14953 }
14954 
14955 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrev64_s16(int16x4_t a)14956 vrev64_s16 (int16x4_t a)
14957 {
14958   int16x4_t result;
14959   __asm__ ("rev64 %0.4h,%1.4h"
14960            : "=w"(result)
14961            : "w"(a)
14962            : /* No clobbers */);
14963   return result;
14964 }
14965 
14966 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrev64_s32(int32x2_t a)14967 vrev64_s32 (int32x2_t a)
14968 {
14969   int32x2_t result;
14970   __asm__ ("rev64 %0.2s,%1.2s"
14971            : "=w"(result)
14972            : "w"(a)
14973            : /* No clobbers */);
14974   return result;
14975 }
14976 
14977 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrev64_u8(uint8x8_t a)14978 vrev64_u8 (uint8x8_t a)
14979 {
14980   uint8x8_t result;
14981   __asm__ ("rev64 %0.8b,%1.8b"
14982            : "=w"(result)
14983            : "w"(a)
14984            : /* No clobbers */);
14985   return result;
14986 }
14987 
14988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrev64_u16(uint16x4_t a)14989 vrev64_u16 (uint16x4_t a)
14990 {
14991   uint16x4_t result;
14992   __asm__ ("rev64 %0.4h,%1.4h"
14993            : "=w"(result)
14994            : "w"(a)
14995            : /* No clobbers */);
14996   return result;
14997 }
14998 
14999 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrev64_u32(uint32x2_t a)15000 vrev64_u32 (uint32x2_t a)
15001 {
15002   uint32x2_t result;
15003   __asm__ ("rev64 %0.2s,%1.2s"
15004            : "=w"(result)
15005            : "w"(a)
15006            : /* No clobbers */);
15007   return result;
15008 }
15009 
15010 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrev64q_f32(float32x4_t a)15011 vrev64q_f32 (float32x4_t a)
15012 {
15013   float32x4_t result;
15014   __asm__ ("rev64 %0.4s,%1.4s"
15015            : "=w"(result)
15016            : "w"(a)
15017            : /* No clobbers */);
15018   return result;
15019 }
15020 
15021 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrev64q_p8(poly8x16_t a)15022 vrev64q_p8 (poly8x16_t a)
15023 {
15024   poly8x16_t result;
15025   __asm__ ("rev64 %0.16b,%1.16b"
15026            : "=w"(result)
15027            : "w"(a)
15028            : /* No clobbers */);
15029   return result;
15030 }
15031 
15032 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vrev64q_p16(poly16x8_t a)15033 vrev64q_p16 (poly16x8_t a)
15034 {
15035   poly16x8_t result;
15036   __asm__ ("rev64 %0.8h,%1.8h"
15037            : "=w"(result)
15038            : "w"(a)
15039            : /* No clobbers */);
15040   return result;
15041 }
15042 
15043 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrev64q_s8(int8x16_t a)15044 vrev64q_s8 (int8x16_t a)
15045 {
15046   int8x16_t result;
15047   __asm__ ("rev64 %0.16b,%1.16b"
15048            : "=w"(result)
15049            : "w"(a)
15050            : /* No clobbers */);
15051   return result;
15052 }
15053 
15054 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrev64q_s16(int16x8_t a)15055 vrev64q_s16 (int16x8_t a)
15056 {
15057   int16x8_t result;
15058   __asm__ ("rev64 %0.8h,%1.8h"
15059            : "=w"(result)
15060            : "w"(a)
15061            : /* No clobbers */);
15062   return result;
15063 }
15064 
15065 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrev64q_s32(int32x4_t a)15066 vrev64q_s32 (int32x4_t a)
15067 {
15068   int32x4_t result;
15069   __asm__ ("rev64 %0.4s,%1.4s"
15070            : "=w"(result)
15071            : "w"(a)
15072            : /* No clobbers */);
15073   return result;
15074 }
15075 
15076 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrev64q_u8(uint8x16_t a)15077 vrev64q_u8 (uint8x16_t a)
15078 {
15079   uint8x16_t result;
15080   __asm__ ("rev64 %0.16b,%1.16b"
15081            : "=w"(result)
15082            : "w"(a)
15083            : /* No clobbers */);
15084   return result;
15085 }
15086 
15087 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrev64q_u16(uint16x8_t a)15088 vrev64q_u16 (uint16x8_t a)
15089 {
15090   uint16x8_t result;
15091   __asm__ ("rev64 %0.8h,%1.8h"
15092            : "=w"(result)
15093            : "w"(a)
15094            : /* No clobbers */);
15095   return result;
15096 }
15097 
15098 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrev64q_u32(uint32x4_t a)15099 vrev64q_u32 (uint32x4_t a)
15100 {
15101   uint32x4_t result;
15102   __asm__ ("rev64 %0.4s,%1.4s"
15103            : "=w"(result)
15104            : "w"(a)
15105            : /* No clobbers */);
15106   return result;
15107 }
15108 
15109 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrnd_f32(float32x2_t a)15110 vrnd_f32 (float32x2_t a)
15111 {
15112   float32x2_t result;
15113   __asm__ ("frintz %0.2s,%1.2s"
15114            : "=w"(result)
15115            : "w"(a)
15116            : /* No clobbers */);
15117   return result;
15118 }
15119 
15120 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrnda_f32(float32x2_t a)15121 vrnda_f32 (float32x2_t a)
15122 {
15123   float32x2_t result;
15124   __asm__ ("frinta %0.2s,%1.2s"
15125            : "=w"(result)
15126            : "w"(a)
15127            : /* No clobbers */);
15128   return result;
15129 }
15130 
15131 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndm_f32(float32x2_t a)15132 vrndm_f32 (float32x2_t a)
15133 {
15134   float32x2_t result;
15135   __asm__ ("frintm %0.2s,%1.2s"
15136            : "=w"(result)
15137            : "w"(a)
15138            : /* No clobbers */);
15139   return result;
15140 }
15141 
15142 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndn_f32(float32x2_t a)15143 vrndn_f32 (float32x2_t a)
15144 {
15145   float32x2_t result;
15146   __asm__ ("frintn %0.2s,%1.2s"
15147            : "=w"(result)
15148            : "w"(a)
15149            : /* No clobbers */);
15150   return result;
15151 }
15152 
15153 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndp_f32(float32x2_t a)15154 vrndp_f32 (float32x2_t a)
15155 {
15156   float32x2_t result;
15157   __asm__ ("frintp %0.2s,%1.2s"
15158            : "=w"(result)
15159            : "w"(a)
15160            : /* No clobbers */);
15161   return result;
15162 }
15163 
15164 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndq_f32(float32x4_t a)15165 vrndq_f32 (float32x4_t a)
15166 {
15167   float32x4_t result;
15168   __asm__ ("frintz %0.4s,%1.4s"
15169            : "=w"(result)
15170            : "w"(a)
15171            : /* No clobbers */);
15172   return result;
15173 }
15174 
15175 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndq_f64(float64x2_t a)15176 vrndq_f64 (float64x2_t a)
15177 {
15178   float64x2_t result;
15179   __asm__ ("frintz %0.2d,%1.2d"
15180            : "=w"(result)
15181            : "w"(a)
15182            : /* No clobbers */);
15183   return result;
15184 }
15185 
15186 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndqa_f32(float32x4_t a)15187 vrndqa_f32 (float32x4_t a)
15188 {
15189   float32x4_t result;
15190   __asm__ ("frinta %0.4s,%1.4s"
15191            : "=w"(result)
15192            : "w"(a)
15193            : /* No clobbers */);
15194   return result;
15195 }
15196 
15197 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndqa_f64(float64x2_t a)15198 vrndqa_f64 (float64x2_t a)
15199 {
15200   float64x2_t result;
15201   __asm__ ("frinta %0.2d,%1.2d"
15202            : "=w"(result)
15203            : "w"(a)
15204            : /* No clobbers */);
15205   return result;
15206 }
15207 
15208 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndqm_f32(float32x4_t a)15209 vrndqm_f32 (float32x4_t a)
15210 {
15211   float32x4_t result;
15212   __asm__ ("frintm %0.4s,%1.4s"
15213            : "=w"(result)
15214            : "w"(a)
15215            : /* No clobbers */);
15216   return result;
15217 }
15218 
15219 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndqm_f64(float64x2_t a)15220 vrndqm_f64 (float64x2_t a)
15221 {
15222   float64x2_t result;
15223   __asm__ ("frintm %0.2d,%1.2d"
15224            : "=w"(result)
15225            : "w"(a)
15226            : /* No clobbers */);
15227   return result;
15228 }
15229 
15230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndqn_f32(float32x4_t a)15231 vrndqn_f32 (float32x4_t a)
15232 {
15233   float32x4_t result;
15234   __asm__ ("frintn %0.4s,%1.4s"
15235            : "=w"(result)
15236            : "w"(a)
15237            : /* No clobbers */);
15238   return result;
15239 }
15240 
15241 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndqn_f64(float64x2_t a)15242 vrndqn_f64 (float64x2_t a)
15243 {
15244   float64x2_t result;
15245   __asm__ ("frintn %0.2d,%1.2d"
15246            : "=w"(result)
15247            : "w"(a)
15248            : /* No clobbers */);
15249   return result;
15250 }
15251 
15252 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrndqp_f32(float32x4_t a)15253 vrndqp_f32 (float32x4_t a)
15254 {
15255   float32x4_t result;
15256   __asm__ ("frintp %0.4s,%1.4s"
15257            : "=w"(result)
15258            : "w"(a)
15259            : /* No clobbers */);
15260   return result;
15261 }
15262 
15263 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrndqp_f64(float64x2_t a)15264 vrndqp_f64 (float64x2_t a)
15265 {
15266   float64x2_t result;
15267   __asm__ ("frintp %0.2d,%1.2d"
15268            : "=w"(result)
15269            : "w"(a)
15270            : /* No clobbers */);
15271   return result;
15272 }
15273 
15274 #define vrshrn_high_n_s16(a, b, c)                                      \
15275   __extension__                                                         \
15276     ({                                                                  \
15277        int16x8_t b_ = (b);                                              \
15278        int8x8_t a_ = (a);                                               \
15279        int8x16_t result = vcombine_s8                                   \
15280                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
15281        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
15282                 : "+w"(result)                                          \
15283                 : "w"(b_), "i"(c)                                       \
15284                 : /* No clobbers */);                                   \
15285        result;                                                          \
15286      })
15287 
15288 #define vrshrn_high_n_s32(a, b, c)                                      \
15289   __extension__                                                         \
15290     ({                                                                  \
15291        int32x4_t b_ = (b);                                              \
15292        int16x4_t a_ = (a);                                              \
15293        int16x8_t result = vcombine_s16                                  \
15294                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
15295        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
15296                 : "+w"(result)                                          \
15297                 : "w"(b_), "i"(c)                                       \
15298                 : /* No clobbers */);                                   \
15299        result;                                                          \
15300      })
15301 
15302 #define vrshrn_high_n_s64(a, b, c)                                      \
15303   __extension__                                                         \
15304     ({                                                                  \
15305        int64x2_t b_ = (b);                                              \
15306        int32x2_t a_ = (a);                                              \
15307        int32x4_t result = vcombine_s32                                  \
15308                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
15309        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
15310                 : "+w"(result)                                          \
15311                 : "w"(b_), "i"(c)                                       \
15312                 : /* No clobbers */);                                   \
15313        result;                                                          \
15314      })
15315 
15316 #define vrshrn_high_n_u16(a, b, c)                                      \
15317   __extension__                                                         \
15318     ({                                                                  \
15319        uint16x8_t b_ = (b);                                             \
15320        uint8x8_t a_ = (a);                                              \
15321        uint8x16_t result = vcombine_u8                                  \
15322                             (a_, vcreate_u8 (UINT64_C (0x0)));          \
15323        __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
15324                 : "+w"(result)                                          \
15325                 : "w"(b_), "i"(c)                                       \
15326                 : /* No clobbers */);                                   \
15327        result;                                                          \
15328      })
15329 
15330 #define vrshrn_high_n_u32(a, b, c)                                      \
15331   __extension__                                                         \
15332     ({                                                                  \
15333        uint32x4_t b_ = (b);                                             \
15334        uint16x4_t a_ = (a);                                             \
15335        uint16x8_t result = vcombine_u16                                 \
15336                             (a_, vcreate_u16 (UINT64_C (0x0)));         \
15337        __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
15338                 : "+w"(result)                                          \
15339                 : "w"(b_), "i"(c)                                       \
15340                 : /* No clobbers */);                                   \
15341        result;                                                          \
15342      })
15343 
15344 #define vrshrn_high_n_u64(a, b, c)                                      \
15345   __extension__                                                         \
15346     ({                                                                  \
15347        uint64x2_t b_ = (b);                                             \
15348        uint32x2_t a_ = (a);                                             \
15349        uint32x4_t result = vcombine_u32                                 \
15350                             (a_, vcreate_u32 (UINT64_C (0x0)));         \
15351        __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
15352                 : "+w"(result)                                          \
15353                 : "w"(b_), "i"(c)                                       \
15354                 : /* No clobbers */);                                   \
15355        result;                                                          \
15356      })
15357 
15358 #define vrshrn_n_s16(a, b)                                              \
15359   __extension__                                                         \
15360     ({                                                                  \
15361        int16x8_t a_ = (a);                                              \
15362        int8x8_t result;                                                 \
15363        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
15364                 : "=w"(result)                                          \
15365                 : "w"(a_), "i"(b)                                       \
15366                 : /* No clobbers */);                                   \
15367        result;                                                          \
15368      })
15369 
15370 #define vrshrn_n_s32(a, b)                                              \
15371   __extension__                                                         \
15372     ({                                                                  \
15373        int32x4_t a_ = (a);                                              \
15374        int16x4_t result;                                                \
15375        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
15376                 : "=w"(result)                                          \
15377                 : "w"(a_), "i"(b)                                       \
15378                 : /* No clobbers */);                                   \
15379        result;                                                          \
15380      })
15381 
15382 #define vrshrn_n_s64(a, b)                                              \
15383   __extension__                                                         \
15384     ({                                                                  \
15385        int64x2_t a_ = (a);                                              \
15386        int32x2_t result;                                                \
15387        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
15388                 : "=w"(result)                                          \
15389                 : "w"(a_), "i"(b)                                       \
15390                 : /* No clobbers */);                                   \
15391        result;                                                          \
15392      })
15393 
15394 #define vrshrn_n_u16(a, b)                                              \
15395   __extension__                                                         \
15396     ({                                                                  \
15397        uint16x8_t a_ = (a);                                             \
15398        uint8x8_t result;                                                \
15399        __asm__ ("rshrn %0.8b,%1.8h,%2"                                  \
15400                 : "=w"(result)                                          \
15401                 : "w"(a_), "i"(b)                                       \
15402                 : /* No clobbers */);                                   \
15403        result;                                                          \
15404      })
15405 
15406 #define vrshrn_n_u32(a, b)                                              \
15407   __extension__                                                         \
15408     ({                                                                  \
15409        uint32x4_t a_ = (a);                                             \
15410        uint16x4_t result;                                               \
15411        __asm__ ("rshrn %0.4h,%1.4s,%2"                                  \
15412                 : "=w"(result)                                          \
15413                 : "w"(a_), "i"(b)                                       \
15414                 : /* No clobbers */);                                   \
15415        result;                                                          \
15416      })
15417 
15418 #define vrshrn_n_u64(a, b)                                              \
15419   __extension__                                                         \
15420     ({                                                                  \
15421        uint64x2_t a_ = (a);                                             \
15422        uint32x2_t result;                                               \
15423        __asm__ ("rshrn %0.2s,%1.2d,%2"                                  \
15424                 : "=w"(result)                                          \
15425                 : "w"(a_), "i"(b)                                       \
15426                 : /* No clobbers */);                                   \
15427        result;                                                          \
15428      })
15429 
15430 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrsqrte_f32(float32x2_t a)15431 vrsqrte_f32 (float32x2_t a)
15432 {
15433   float32x2_t result;
15434   __asm__ ("frsqrte %0.2s,%1.2s"
15435            : "=w"(result)
15436            : "w"(a)
15437            : /* No clobbers */);
15438   return result;
15439 }
15440 
15441 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsqrte_f64(float64x2_t a)15442 vrsqrte_f64 (float64x2_t a)
15443 {
15444   float64x2_t result;
15445   __asm__ ("frsqrte %0.2d,%1.2d"
15446            : "=w"(result)
15447            : "w"(a)
15448            : /* No clobbers */);
15449   return result;
15450 }
15451 
15452 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsqrte_u32(uint32x2_t a)15453 vrsqrte_u32 (uint32x2_t a)
15454 {
15455   uint32x2_t result;
15456   __asm__ ("ursqrte %0.2s,%1.2s"
15457            : "=w"(result)
15458            : "w"(a)
15459            : /* No clobbers */);
15460   return result;
15461 }
15462 
15463 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrsqrted_f64(float64_t a)15464 vrsqrted_f64 (float64_t a)
15465 {
15466   float64_t result;
15467   __asm__ ("frsqrte %d0,%d1"
15468            : "=w"(result)
15469            : "w"(a)
15470            : /* No clobbers */);
15471   return result;
15472 }
15473 
15474 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrsqrteq_f32(float32x4_t a)15475 vrsqrteq_f32 (float32x4_t a)
15476 {
15477   float32x4_t result;
15478   __asm__ ("frsqrte %0.4s,%1.4s"
15479            : "=w"(result)
15480            : "w"(a)
15481            : /* No clobbers */);
15482   return result;
15483 }
15484 
15485 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsqrteq_f64(float64x2_t a)15486 vrsqrteq_f64 (float64x2_t a)
15487 {
15488   float64x2_t result;
15489   __asm__ ("frsqrte %0.2d,%1.2d"
15490            : "=w"(result)
15491            : "w"(a)
15492            : /* No clobbers */);
15493   return result;
15494 }
15495 
15496 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsqrteq_u32(uint32x4_t a)15497 vrsqrteq_u32 (uint32x4_t a)
15498 {
15499   uint32x4_t result;
15500   __asm__ ("ursqrte %0.4s,%1.4s"
15501            : "=w"(result)
15502            : "w"(a)
15503            : /* No clobbers */);
15504   return result;
15505 }
15506 
15507 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrsqrtes_f32(float32_t a)15508 vrsqrtes_f32 (float32_t a)
15509 {
15510   float32_t result;
15511   __asm__ ("frsqrte %s0,%s1"
15512            : "=w"(result)
15513            : "w"(a)
15514            : /* No clobbers */);
15515   return result;
15516 }
15517 
15518 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrsqrts_f32(float32x2_t a,float32x2_t b)15519 vrsqrts_f32 (float32x2_t a, float32x2_t b)
15520 {
15521   float32x2_t result;
15522   __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
15523            : "=w"(result)
15524            : "w"(a), "w"(b)
15525            : /* No clobbers */);
15526   return result;
15527 }
15528 
15529 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrsqrtsd_f64(float64_t a,float64_t b)15530 vrsqrtsd_f64 (float64_t a, float64_t b)
15531 {
15532   float64_t result;
15533   __asm__ ("frsqrts %d0,%d1,%d2"
15534            : "=w"(result)
15535            : "w"(a), "w"(b)
15536            : /* No clobbers */);
15537   return result;
15538 }
15539 
15540 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrsqrtsq_f32(float32x4_t a,float32x4_t b)15541 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
15542 {
15543   float32x4_t result;
15544   __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
15545            : "=w"(result)
15546            : "w"(a), "w"(b)
15547            : /* No clobbers */);
15548   return result;
15549 }
15550 
15551 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsqrtsq_f64(float64x2_t a,float64x2_t b)15552 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
15553 {
15554   float64x2_t result;
15555   __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15556            : "=w"(result)
15557            : "w"(a), "w"(b)
15558            : /* No clobbers */);
15559   return result;
15560 }
15561 
15562 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrsqrtss_f32(float32_t a,float32_t b)15563 vrsqrtss_f32 (float32_t a, float32_t b)
15564 {
15565   float32_t result;
15566   __asm__ ("frsqrts %s0,%s1,%s2"
15567            : "=w"(result)
15568            : "w"(a), "w"(b)
15569            : /* No clobbers */);
15570   return result;
15571 }
15572 
15573 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrsrtsq_f64(float64x2_t a,float64x2_t b)15574 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
15575 {
15576   float64x2_t result;
15577   __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15578            : "=w"(result)
15579            : "w"(a), "w"(b)
15580            : /* No clobbers */);
15581   return result;
15582 }
15583 
15584 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_s16(int8x8_t a,int16x8_t b,int16x8_t c)15585 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
15586 {
15587   int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
15588   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15589            : "+w"(result)
15590            : "w"(b), "w"(c)
15591            : /* No clobbers */);
15592   return result;
15593 }
15594 
15595 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_s32(int16x4_t a,int32x4_t b,int32x4_t c)15596 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
15597 {
15598   int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
15599   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15600            : "+w"(result)
15601            : "w"(b), "w"(c)
15602            : /* No clobbers */);
15603   return result;
15604 }
15605 
15606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_s64(int32x2_t a,int64x2_t b,int64x2_t c)15607 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
15608 {
15609   int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
15610   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15611            : "+w"(result)
15612            : "w"(b), "w"(c)
15613            : /* No clobbers */);
15614   return result;
15615 }
15616 
15617 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_u16(uint8x8_t a,uint16x8_t b,uint16x8_t c)15618 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
15619 {
15620   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
15621   __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15622            : "+w"(result)
15623            : "w"(b), "w"(c)
15624            : /* No clobbers */);
15625   return result;
15626 }
15627 
15628 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_u32(uint16x4_t a,uint32x4_t b,uint32x4_t c)15629 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
15630 {
15631   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
15632   __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15633            : "+w"(result)
15634            : "w"(b), "w"(c)
15635            : /* No clobbers */);
15636   return result;
15637 }
15638 
15639 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_u64(uint32x2_t a,uint64x2_t b,uint64x2_t c)15640 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
15641 {
15642   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
15643   __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15644            : "+w"(result)
15645            : "w"(b), "w"(c)
15646            : /* No clobbers */);
15647   return result;
15648 }
15649 
15650 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrsubhn_s16(int16x8_t a,int16x8_t b)15651 vrsubhn_s16 (int16x8_t a, int16x8_t b)
15652 {
15653   int8x8_t result;
15654   __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15655            : "=w"(result)
15656            : "w"(a), "w"(b)
15657            : /* No clobbers */);
15658   return result;
15659 }
15660 
15661 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrsubhn_s32(int32x4_t a,int32x4_t b)15662 vrsubhn_s32 (int32x4_t a, int32x4_t b)
15663 {
15664   int16x4_t result;
15665   __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15666            : "=w"(result)
15667            : "w"(a), "w"(b)
15668            : /* No clobbers */);
15669   return result;
15670 }
15671 
15672 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrsubhn_s64(int64x2_t a,int64x2_t b)15673 vrsubhn_s64 (int64x2_t a, int64x2_t b)
15674 {
15675   int32x2_t result;
15676   __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15677            : "=w"(result)
15678            : "w"(a), "w"(b)
15679            : /* No clobbers */);
15680   return result;
15681 }
15682 
15683 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrsubhn_u16(uint16x8_t a,uint16x8_t b)15684 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
15685 {
15686   uint8x8_t result;
15687   __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15688            : "=w"(result)
15689            : "w"(a), "w"(b)
15690            : /* No clobbers */);
15691   return result;
15692 }
15693 
15694 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrsubhn_u32(uint32x4_t a,uint32x4_t b)15695 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
15696 {
15697   uint16x4_t result;
15698   __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15699            : "=w"(result)
15700            : "w"(a), "w"(b)
15701            : /* No clobbers */);
15702   return result;
15703 }
15704 
15705 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsubhn_u64(uint64x2_t a,uint64x2_t b)15706 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
15707 {
15708   uint32x2_t result;
15709   __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15710            : "=w"(result)
15711            : "w"(a), "w"(b)
15712            : /* No clobbers */);
15713   return result;
15714 }
15715 
15716 #define vset_lane_f32(a, b, c)                                          \
15717   __extension__                                                         \
15718     ({                                                                  \
15719        float32x2_t b_ = (b);                                            \
15720        float32_t a_ = (a);                                              \
15721        float32x2_t result;                                              \
15722        __asm__ ("ins %0.s[%3], %w1"                                     \
15723                 : "=w"(result)                                          \
15724                 : "r"(a_), "0"(b_), "i"(c)                              \
15725                 : /* No clobbers */);                                   \
15726        result;                                                          \
15727      })
15728 
15729 #define vset_lane_f64(a, b, c)                                          \
15730   __extension__                                                         \
15731     ({                                                                  \
15732        float64x1_t b_ = (b);                                            \
15733        float64_t a_ = (a);                                              \
15734        float64x1_t result;                                              \
15735        __asm__ ("ins %0.d[%3], %x1"                                     \
15736                 : "=w"(result)                                          \
15737                 : "r"(a_), "0"(b_), "i"(c)                              \
15738                 : /* No clobbers */);                                   \
15739        result;                                                          \
15740      })
15741 
15742 #define vset_lane_p8(a, b, c)                                           \
15743   __extension__                                                         \
15744     ({                                                                  \
15745        poly8x8_t b_ = (b);                                              \
15746        poly8_t a_ = (a);                                                \
15747        poly8x8_t result;                                                \
15748        __asm__ ("ins %0.b[%3], %w1"                                     \
15749                 : "=w"(result)                                          \
15750                 : "r"(a_), "0"(b_), "i"(c)                              \
15751                 : /* No clobbers */);                                   \
15752        result;                                                          \
15753      })
15754 
15755 #define vset_lane_p16(a, b, c)                                          \
15756   __extension__                                                         \
15757     ({                                                                  \
15758        poly16x4_t b_ = (b);                                             \
15759        poly16_t a_ = (a);                                               \
15760        poly16x4_t result;                                               \
15761        __asm__ ("ins %0.h[%3], %w1"                                     \
15762                 : "=w"(result)                                          \
15763                 : "r"(a_), "0"(b_), "i"(c)                              \
15764                 : /* No clobbers */);                                   \
15765        result;                                                          \
15766      })
15767 
15768 #define vset_lane_s8(a, b, c)                                           \
15769   __extension__                                                         \
15770     ({                                                                  \
15771        int8x8_t b_ = (b);                                               \
15772        int8_t a_ = (a);                                                 \
15773        int8x8_t result;                                                 \
15774        __asm__ ("ins %0.b[%3], %w1"                                     \
15775                 : "=w"(result)                                          \
15776                 : "r"(a_), "0"(b_), "i"(c)                              \
15777                 : /* No clobbers */);                                   \
15778        result;                                                          \
15779      })
15780 
15781 #define vset_lane_s16(a, b, c)                                          \
15782   __extension__                                                         \
15783     ({                                                                  \
15784        int16x4_t b_ = (b);                                              \
15785        int16_t a_ = (a);                                                \
15786        int16x4_t result;                                                \
15787        __asm__ ("ins %0.h[%3], %w1"                                     \
15788                 : "=w"(result)                                          \
15789                 : "r"(a_), "0"(b_), "i"(c)                              \
15790                 : /* No clobbers */);                                   \
15791        result;                                                          \
15792      })
15793 
15794 #define vset_lane_s32(a, b, c)                                          \
15795   __extension__                                                         \
15796     ({                                                                  \
15797        int32x2_t b_ = (b);                                              \
15798        int32_t a_ = (a);                                                \
15799        int32x2_t result;                                                \
15800        __asm__ ("ins %0.s[%3], %w1"                                     \
15801                 : "=w"(result)                                          \
15802                 : "r"(a_), "0"(b_), "i"(c)                              \
15803                 : /* No clobbers */);                                   \
15804        result;                                                          \
15805      })
15806 
15807 #define vset_lane_s64(a, b, c)                                          \
15808   __extension__                                                         \
15809     ({                                                                  \
15810        int64x1_t b_ = (b);                                              \
15811        int64_t a_ = (a);                                                \
15812        int64x1_t result;                                                \
15813        __asm__ ("ins %0.d[%3], %x1"                                     \
15814                 : "=w"(result)                                          \
15815                 : "r"(a_), "0"(b_), "i"(c)                              \
15816                 : /* No clobbers */);                                   \
15817        result;                                                          \
15818      })
15819 
15820 #define vset_lane_u8(a, b, c)                                           \
15821   __extension__                                                         \
15822     ({                                                                  \
15823        uint8x8_t b_ = (b);                                              \
15824        uint8_t a_ = (a);                                                \
15825        uint8x8_t result;                                                \
15826        __asm__ ("ins %0.b[%3], %w1"                                     \
15827                 : "=w"(result)                                          \
15828                 : "r"(a_), "0"(b_), "i"(c)                              \
15829                 : /* No clobbers */);                                   \
15830        result;                                                          \
15831      })
15832 
15833 #define vset_lane_u16(a, b, c)                                          \
15834   __extension__                                                         \
15835     ({                                                                  \
15836        uint16x4_t b_ = (b);                                             \
15837        uint16_t a_ = (a);                                               \
15838        uint16x4_t result;                                               \
15839        __asm__ ("ins %0.h[%3], %w1"                                     \
15840                 : "=w"(result)                                          \
15841                 : "r"(a_), "0"(b_), "i"(c)                              \
15842                 : /* No clobbers */);                                   \
15843        result;                                                          \
15844      })
15845 
15846 #define vset_lane_u32(a, b, c)                                          \
15847   __extension__                                                         \
15848     ({                                                                  \
15849        uint32x2_t b_ = (b);                                             \
15850        uint32_t a_ = (a);                                               \
15851        uint32x2_t result;                                               \
15852        __asm__ ("ins %0.s[%3], %w1"                                     \
15853                 : "=w"(result)                                          \
15854                 : "r"(a_), "0"(b_), "i"(c)                              \
15855                 : /* No clobbers */);                                   \
15856        result;                                                          \
15857      })
15858 
15859 #define vset_lane_u64(a, b, c)                                          \
15860   __extension__                                                         \
15861     ({                                                                  \
15862        uint64x1_t b_ = (b);                                             \
15863        uint64_t a_ = (a);                                               \
15864        uint64x1_t result;                                               \
15865        __asm__ ("ins %0.d[%3], %x1"                                     \
15866                 : "=w"(result)                                          \
15867                 : "r"(a_), "0"(b_), "i"(c)                              \
15868                 : /* No clobbers */);                                   \
15869        result;                                                          \
15870      })
15871 
15872 #define vsetq_lane_f32(a, b, c)                                         \
15873   __extension__                                                         \
15874     ({                                                                  \
15875        float32x4_t b_ = (b);                                            \
15876        float32_t a_ = (a);                                              \
15877        float32x4_t result;                                              \
15878        __asm__ ("ins %0.s[%3], %w1"                                     \
15879                 : "=w"(result)                                          \
15880                 : "r"(a_), "0"(b_), "i"(c)                              \
15881                 : /* No clobbers */);                                   \
15882        result;                                                          \
15883      })
15884 
15885 #define vsetq_lane_f64(a, b, c)                                         \
15886   __extension__                                                         \
15887     ({                                                                  \
15888        float64x2_t b_ = (b);                                            \
15889        float64_t a_ = (a);                                              \
15890        float64x2_t result;                                              \
15891        __asm__ ("ins %0.d[%3], %x1"                                     \
15892                 : "=w"(result)                                          \
15893                 : "r"(a_), "0"(b_), "i"(c)                              \
15894                 : /* No clobbers */);                                   \
15895        result;                                                          \
15896      })
15897 
15898 #define vsetq_lane_p8(a, b, c)                                          \
15899   __extension__                                                         \
15900     ({                                                                  \
15901        poly8x16_t b_ = (b);                                             \
15902        poly8_t a_ = (a);                                                \
15903        poly8x16_t result;                                               \
15904        __asm__ ("ins %0.b[%3], %w1"                                     \
15905                 : "=w"(result)                                          \
15906                 : "r"(a_), "0"(b_), "i"(c)                              \
15907                 : /* No clobbers */);                                   \
15908        result;                                                          \
15909      })
15910 
15911 #define vsetq_lane_p16(a, b, c)                                         \
15912   __extension__                                                         \
15913     ({                                                                  \
15914        poly16x8_t b_ = (b);                                             \
15915        poly16_t a_ = (a);                                               \
15916        poly16x8_t result;                                               \
15917        __asm__ ("ins %0.h[%3], %w1"                                     \
15918                 : "=w"(result)                                          \
15919                 : "r"(a_), "0"(b_), "i"(c)                              \
15920                 : /* No clobbers */);                                   \
15921        result;                                                          \
15922      })
15923 
15924 #define vsetq_lane_s8(a, b, c)                                          \
15925   __extension__                                                         \
15926     ({                                                                  \
15927        int8x16_t b_ = (b);                                              \
15928        int8_t a_ = (a);                                                 \
15929        int8x16_t result;                                                \
15930        __asm__ ("ins %0.b[%3], %w1"                                     \
15931                 : "=w"(result)                                          \
15932                 : "r"(a_), "0"(b_), "i"(c)                              \
15933                 : /* No clobbers */);                                   \
15934        result;                                                          \
15935      })
15936 
15937 #define vsetq_lane_s16(a, b, c)                                         \
15938   __extension__                                                         \
15939     ({                                                                  \
15940        int16x8_t b_ = (b);                                              \
15941        int16_t a_ = (a);                                                \
15942        int16x8_t result;                                                \
15943        __asm__ ("ins %0.h[%3], %w1"                                     \
15944                 : "=w"(result)                                          \
15945                 : "r"(a_), "0"(b_), "i"(c)                              \
15946                 : /* No clobbers */);                                   \
15947        result;                                                          \
15948      })
15949 
15950 #define vsetq_lane_s32(a, b, c)                                         \
15951   __extension__                                                         \
15952     ({                                                                  \
15953        int32x4_t b_ = (b);                                              \
15954        int32_t a_ = (a);                                                \
15955        int32x4_t result;                                                \
15956        __asm__ ("ins %0.s[%3], %w1"                                     \
15957                 : "=w"(result)                                          \
15958                 : "r"(a_), "0"(b_), "i"(c)                              \
15959                 : /* No clobbers */);                                   \
15960        result;                                                          \
15961      })
15962 
15963 #define vsetq_lane_s64(a, b, c)                                         \
15964   __extension__                                                         \
15965     ({                                                                  \
15966        int64x2_t b_ = (b);                                              \
15967        int64_t a_ = (a);                                                \
15968        int64x2_t result;                                                \
15969        __asm__ ("ins %0.d[%3], %x1"                                     \
15970                 : "=w"(result)                                          \
15971                 : "r"(a_), "0"(b_), "i"(c)                              \
15972                 : /* No clobbers */);                                   \
15973        result;                                                          \
15974      })
15975 
15976 #define vsetq_lane_u8(a, b, c)                                          \
15977   __extension__                                                         \
15978     ({                                                                  \
15979        uint8x16_t b_ = (b);                                             \
15980        uint8_t a_ = (a);                                                \
15981        uint8x16_t result;                                               \
15982        __asm__ ("ins %0.b[%3], %w1"                                     \
15983                 : "=w"(result)                                          \
15984                 : "r"(a_), "0"(b_), "i"(c)                              \
15985                 : /* No clobbers */);                                   \
15986        result;                                                          \
15987      })
15988 
15989 #define vsetq_lane_u16(a, b, c)                                         \
15990   __extension__                                                         \
15991     ({                                                                  \
15992        uint16x8_t b_ = (b);                                             \
15993        uint16_t a_ = (a);                                               \
15994        uint16x8_t result;                                               \
15995        __asm__ ("ins %0.h[%3], %w1"                                     \
15996                 : "=w"(result)                                          \
15997                 : "r"(a_), "0"(b_), "i"(c)                              \
15998                 : /* No clobbers */);                                   \
15999        result;                                                          \
16000      })
16001 
16002 #define vsetq_lane_u32(a, b, c)                                         \
16003   __extension__                                                         \
16004     ({                                                                  \
16005        uint32x4_t b_ = (b);                                             \
16006        uint32_t a_ = (a);                                               \
16007        uint32x4_t result;                                               \
16008        __asm__ ("ins %0.s[%3], %w1"                                     \
16009                 : "=w"(result)                                          \
16010                 : "r"(a_), "0"(b_), "i"(c)                              \
16011                 : /* No clobbers */);                                   \
16012        result;                                                          \
16013      })
16014 
16015 #define vsetq_lane_u64(a, b, c)                                         \
16016   __extension__                                                         \
16017     ({                                                                  \
16018        uint64x2_t b_ = (b);                                             \
16019        uint64_t a_ = (a);                                               \
16020        uint64x2_t result;                                               \
16021        __asm__ ("ins %0.d[%3], %x1"                                     \
16022                 : "=w"(result)                                          \
16023                 : "r"(a_), "0"(b_), "i"(c)                              \
16024                 : /* No clobbers */);                                   \
16025        result;                                                          \
16026      })
16027 
16028 #define vshrn_high_n_s16(a, b, c)                                       \
16029   __extension__                                                         \
16030     ({                                                                  \
16031        int16x8_t b_ = (b);                                              \
16032        int8x8_t a_ = (a);                                               \
16033        int8x16_t result = vcombine_s8                                   \
16034                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
16035        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
16036                 : "+w"(result)                                          \
16037                 : "w"(b_), "i"(c)                                       \
16038                 : /* No clobbers */);                                   \
16039        result;                                                          \
16040      })
16041 
16042 #define vshrn_high_n_s32(a, b, c)                                       \
16043   __extension__                                                         \
16044     ({                                                                  \
16045        int32x4_t b_ = (b);                                              \
16046        int16x4_t a_ = (a);                                              \
16047        int16x8_t result = vcombine_s16                                  \
16048                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
16049        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
16050                 : "+w"(result)                                          \
16051                 : "w"(b_), "i"(c)                                       \
16052                 : /* No clobbers */);                                   \
16053        result;                                                          \
16054      })
16055 
16056 #define vshrn_high_n_s64(a, b, c)                                       \
16057   __extension__                                                         \
16058     ({                                                                  \
16059        int64x2_t b_ = (b);                                              \
16060        int32x2_t a_ = (a);                                              \
16061        int32x4_t result = vcombine_s32                                  \
16062                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
16063        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
16064                 : "+w"(result)                                          \
16065                 : "w"(b_), "i"(c)                                       \
16066                 : /* No clobbers */);                                   \
16067        result;                                                          \
16068      })
16069 
16070 #define vshrn_high_n_u16(a, b, c)                                       \
16071   __extension__                                                         \
16072     ({                                                                  \
16073        uint16x8_t b_ = (b);                                             \
16074        uint8x8_t a_ = (a);                                              \
16075        uint8x16_t result = vcombine_u8                                  \
16076                             (a_, vcreate_u8 (UINT64_C (0x0)));          \
16077        __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
16078                 : "+w"(result)                                          \
16079                 : "w"(b_), "i"(c)                                       \
16080                 : /* No clobbers */);                                   \
16081        result;                                                          \
16082      })
16083 
16084 #define vshrn_high_n_u32(a, b, c)                                       \
16085   __extension__                                                         \
16086     ({                                                                  \
16087        uint32x4_t b_ = (b);                                             \
16088        uint16x4_t a_ = (a);                                             \
16089        uint16x8_t result = vcombine_u16                                 \
16090                             (a_, vcreate_u16 (UINT64_C (0x0)));         \
16091        __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
16092                 : "+w"(result)                                          \
16093                 : "w"(b_), "i"(c)                                       \
16094                 : /* No clobbers */);                                   \
16095        result;                                                          \
16096      })
16097 
16098 #define vshrn_high_n_u64(a, b, c)                                       \
16099   __extension__                                                         \
16100     ({                                                                  \
16101        uint64x2_t b_ = (b);                                             \
16102        uint32x2_t a_ = (a);                                             \
16103        uint32x4_t result = vcombine_u32                                 \
16104                             (a_, vcreate_u32 (UINT64_C (0x0)));         \
16105        __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
16106                 : "+w"(result)                                          \
16107                 : "w"(b_), "i"(c)                                       \
16108                 : /* No clobbers */);                                   \
16109        result;                                                          \
16110      })
16111 
16112 #define vshrn_n_s16(a, b)                                               \
16113   __extension__                                                         \
16114     ({                                                                  \
16115        int16x8_t a_ = (a);                                              \
16116        int8x8_t result;                                                 \
16117        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
16118                 : "=w"(result)                                          \
16119                 : "w"(a_), "i"(b)                                       \
16120                 : /* No clobbers */);                                   \
16121        result;                                                          \
16122      })
16123 
16124 #define vshrn_n_s32(a, b)                                               \
16125   __extension__                                                         \
16126     ({                                                                  \
16127        int32x4_t a_ = (a);                                              \
16128        int16x4_t result;                                                \
16129        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
16130                 : "=w"(result)                                          \
16131                 : "w"(a_), "i"(b)                                       \
16132                 : /* No clobbers */);                                   \
16133        result;                                                          \
16134      })
16135 
16136 #define vshrn_n_s64(a, b)                                               \
16137   __extension__                                                         \
16138     ({                                                                  \
16139        int64x2_t a_ = (a);                                              \
16140        int32x2_t result;                                                \
16141        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
16142                 : "=w"(result)                                          \
16143                 : "w"(a_), "i"(b)                                       \
16144                 : /* No clobbers */);                                   \
16145        result;                                                          \
16146      })
16147 
16148 #define vshrn_n_u16(a, b)                                               \
16149   __extension__                                                         \
16150     ({                                                                  \
16151        uint16x8_t a_ = (a);                                             \
16152        uint8x8_t result;                                                \
16153        __asm__ ("shrn %0.8b,%1.8h,%2"                                   \
16154                 : "=w"(result)                                          \
16155                 : "w"(a_), "i"(b)                                       \
16156                 : /* No clobbers */);                                   \
16157        result;                                                          \
16158      })
16159 
16160 #define vshrn_n_u32(a, b)                                               \
16161   __extension__                                                         \
16162     ({                                                                  \
16163        uint32x4_t a_ = (a);                                             \
16164        uint16x4_t result;                                               \
16165        __asm__ ("shrn %0.4h,%1.4s,%2"                                   \
16166                 : "=w"(result)                                          \
16167                 : "w"(a_), "i"(b)                                       \
16168                 : /* No clobbers */);                                   \
16169        result;                                                          \
16170      })
16171 
16172 #define vshrn_n_u64(a, b)                                               \
16173   __extension__                                                         \
16174     ({                                                                  \
16175        uint64x2_t a_ = (a);                                             \
16176        uint32x2_t result;                                               \
16177        __asm__ ("shrn %0.2s,%1.2d,%2"                                   \
16178                 : "=w"(result)                                          \
16179                 : "w"(a_), "i"(b)                                       \
16180                 : /* No clobbers */);                                   \
16181        result;                                                          \
16182      })
16183 
16184 #define vsli_n_p8(a, b, c)                                              \
16185   __extension__                                                         \
16186     ({                                                                  \
16187        poly8x8_t b_ = (b);                                              \
16188        poly8x8_t a_ = (a);                                              \
16189        poly8x8_t result;                                                \
16190        __asm__ ("sli %0.8b,%2.8b,%3"                                    \
16191                 : "=w"(result)                                          \
16192                 : "0"(a_), "w"(b_), "i"(c)                              \
16193                 : /* No clobbers */);                                   \
16194        result;                                                          \
16195      })
16196 
16197 #define vsli_n_p16(a, b, c)                                             \
16198   __extension__                                                         \
16199     ({                                                                  \
16200        poly16x4_t b_ = (b);                                             \
16201        poly16x4_t a_ = (a);                                             \
16202        poly16x4_t result;                                               \
16203        __asm__ ("sli %0.4h,%2.4h,%3"                                    \
16204                 : "=w"(result)                                          \
16205                 : "0"(a_), "w"(b_), "i"(c)                              \
16206                 : /* No clobbers */);                                   \
16207        result;                                                          \
16208      })
16209 
16210 #define vsliq_n_p8(a, b, c)                                             \
16211   __extension__                                                         \
16212     ({                                                                  \
16213        poly8x16_t b_ = (b);                                             \
16214        poly8x16_t a_ = (a);                                             \
16215        poly8x16_t result;                                               \
16216        __asm__ ("sli %0.16b,%2.16b,%3"                                  \
16217                 : "=w"(result)                                          \
16218                 : "0"(a_), "w"(b_), "i"(c)                              \
16219                 : /* No clobbers */);                                   \
16220        result;                                                          \
16221      })
16222 
16223 #define vsliq_n_p16(a, b, c)                                            \
16224   __extension__                                                         \
16225     ({                                                                  \
16226        poly16x8_t b_ = (b);                                             \
16227        poly16x8_t a_ = (a);                                             \
16228        poly16x8_t result;                                               \
16229        __asm__ ("sli %0.8h,%2.8h,%3"                                    \
16230                 : "=w"(result)                                          \
16231                 : "0"(a_), "w"(b_), "i"(c)                              \
16232                 : /* No clobbers */);                                   \
16233        result;                                                          \
16234      })
16235 
16236 #define vsri_n_p8(a, b, c)                                              \
16237   __extension__                                                         \
16238     ({                                                                  \
16239        poly8x8_t b_ = (b);                                              \
16240        poly8x8_t a_ = (a);                                              \
16241        poly8x8_t result;                                                \
16242        __asm__ ("sri %0.8b,%2.8b,%3"                                    \
16243                 : "=w"(result)                                          \
16244                 : "0"(a_), "w"(b_), "i"(c)                              \
16245                 : /* No clobbers */);                                   \
16246        result;                                                          \
16247      })
16248 
16249 #define vsri_n_p16(a, b, c)                                             \
16250   __extension__                                                         \
16251     ({                                                                  \
16252        poly16x4_t b_ = (b);                                             \
16253        poly16x4_t a_ = (a);                                             \
16254        poly16x4_t result;                                               \
16255        __asm__ ("sri %0.4h,%2.4h,%3"                                    \
16256                 : "=w"(result)                                          \
16257                 : "0"(a_), "w"(b_), "i"(c)                              \
16258                 : /* No clobbers */);                                   \
16259        result;                                                          \
16260      })
16261 
16262 #define vsriq_n_p8(a, b, c)                                             \
16263   __extension__                                                         \
16264     ({                                                                  \
16265        poly8x16_t b_ = (b);                                             \
16266        poly8x16_t a_ = (a);                                             \
16267        poly8x16_t result;                                               \
16268        __asm__ ("sri %0.16b,%2.16b,%3"                                  \
16269                 : "=w"(result)                                          \
16270                 : "0"(a_), "w"(b_), "i"(c)                              \
16271                 : /* No clobbers */);                                   \
16272        result;                                                          \
16273      })
16274 
16275 #define vsriq_n_p16(a, b, c)                                            \
16276   __extension__                                                         \
16277     ({                                                                  \
16278        poly16x8_t b_ = (b);                                             \
16279        poly16x8_t a_ = (a);                                             \
16280        poly16x8_t result;                                               \
16281        __asm__ ("sri %0.8h,%2.8h,%3"                                    \
16282                 : "=w"(result)                                          \
16283                 : "0"(a_), "w"(b_), "i"(c)                              \
16284                 : /* No clobbers */);                                   \
16285        result;                                                          \
16286      })
16287 
16288 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32(float32_t * a,float32x2_t b)16289 vst1_f32 (float32_t * a, float32x2_t b)
16290 {
16291   __asm__ ("st1 {%1.2s},[%0]"
16292            :
16293            : "r"(a), "w"(b)
16294            : "memory");
16295 }
16296 
16297 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f64(float64_t * a,float64x1_t b)16298 vst1_f64 (float64_t * a, float64x1_t b)
16299 {
16300   __asm__ ("st1 {%1.1d},[%0]"
16301            :
16302            : "r"(a), "w"(b)
16303            : "memory");
16304 }
16305 
16306 #define vst1_lane_f32(a, b, c)                                          \
16307   __extension__                                                         \
16308     ({                                                                  \
16309        float32x2_t b_ = (b);                                            \
16310        float32_t * a_ = (a);                                            \
16311        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16312                 :                                                       \
16313                 : "r"(a_), "w"(b_), "i"(c)                              \
16314                 : "memory");                                            \
16315      })
16316 
16317 #define vst1_lane_f64(a, b, c)                                          \
16318   __extension__                                                         \
16319     ({                                                                  \
16320        float64x1_t b_ = (b);                                            \
16321        float64_t * a_ = (a);                                            \
16322        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16323                 :                                                       \
16324                 : "r"(a_), "w"(b_), "i"(c)                              \
16325                 : "memory");                                            \
16326      })
16327 
16328 #define vst1_lane_p8(a, b, c)                                           \
16329   __extension__                                                         \
16330     ({                                                                  \
16331        poly8x8_t b_ = (b);                                              \
16332        poly8_t * a_ = (a);                                              \
16333        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16334                 :                                                       \
16335                 : "r"(a_), "w"(b_), "i"(c)                              \
16336                 : "memory");                                            \
16337      })
16338 
16339 #define vst1_lane_p16(a, b, c)                                          \
16340   __extension__                                                         \
16341     ({                                                                  \
16342        poly16x4_t b_ = (b);                                             \
16343        poly16_t * a_ = (a);                                             \
16344        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16345                 :                                                       \
16346                 : "r"(a_), "w"(b_), "i"(c)                              \
16347                 : "memory");                                            \
16348      })
16349 
16350 #define vst1_lane_s8(a, b, c)                                           \
16351   __extension__                                                         \
16352     ({                                                                  \
16353        int8x8_t b_ = (b);                                               \
16354        int8_t * a_ = (a);                                               \
16355        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16356                 :                                                       \
16357                 : "r"(a_), "w"(b_), "i"(c)                              \
16358                 : "memory");                                            \
16359      })
16360 
16361 #define vst1_lane_s16(a, b, c)                                          \
16362   __extension__                                                         \
16363     ({                                                                  \
16364        int16x4_t b_ = (b);                                              \
16365        int16_t * a_ = (a);                                              \
16366        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16367                 :                                                       \
16368                 : "r"(a_), "w"(b_), "i"(c)                              \
16369                 : "memory");                                            \
16370      })
16371 
16372 #define vst1_lane_s32(a, b, c)                                          \
16373   __extension__                                                         \
16374     ({                                                                  \
16375        int32x2_t b_ = (b);                                              \
16376        int32_t * a_ = (a);                                              \
16377        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16378                 :                                                       \
16379                 : "r"(a_), "w"(b_), "i"(c)                              \
16380                 : "memory");                                            \
16381      })
16382 
16383 #define vst1_lane_s64(a, b, c)                                          \
16384   __extension__                                                         \
16385     ({                                                                  \
16386        int64x1_t b_ = (b);                                              \
16387        int64_t * a_ = (a);                                              \
16388        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16389                 :                                                       \
16390                 : "r"(a_), "w"(b_), "i"(c)                              \
16391                 : "memory");                                            \
16392      })
16393 
16394 #define vst1_lane_u8(a, b, c)                                           \
16395   __extension__                                                         \
16396     ({                                                                  \
16397        uint8x8_t b_ = (b);                                              \
16398        uint8_t * a_ = (a);                                              \
16399        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16400                 :                                                       \
16401                 : "r"(a_), "w"(b_), "i"(c)                              \
16402                 : "memory");                                            \
16403      })
16404 
16405 #define vst1_lane_u16(a, b, c)                                          \
16406   __extension__                                                         \
16407     ({                                                                  \
16408        uint16x4_t b_ = (b);                                             \
16409        uint16_t * a_ = (a);                                             \
16410        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16411                 :                                                       \
16412                 : "r"(a_), "w"(b_), "i"(c)                              \
16413                 : "memory");                                            \
16414      })
16415 
16416 #define vst1_lane_u32(a, b, c)                                          \
16417   __extension__                                                         \
16418     ({                                                                  \
16419        uint32x2_t b_ = (b);                                             \
16420        uint32_t * a_ = (a);                                             \
16421        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16422                 :                                                       \
16423                 : "r"(a_), "w"(b_), "i"(c)                              \
16424                 : "memory");                                            \
16425      })
16426 
16427 #define vst1_lane_u64(a, b, c)                                          \
16428   __extension__                                                         \
16429     ({                                                                  \
16430        uint64x1_t b_ = (b);                                             \
16431        uint64_t * a_ = (a);                                             \
16432        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16433                 :                                                       \
16434                 : "r"(a_), "w"(b_), "i"(c)                              \
16435                 : "memory");                                            \
16436      })
16437 
16438 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_p8(poly8_t * a,poly8x8_t b)16439 vst1_p8 (poly8_t * a, poly8x8_t b)
16440 {
16441   __asm__ ("st1 {%1.8b},[%0]"
16442            :
16443            : "r"(a), "w"(b)
16444            : "memory");
16445 }
16446 
16447 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_p16(poly16_t * a,poly16x4_t b)16448 vst1_p16 (poly16_t * a, poly16x4_t b)
16449 {
16450   __asm__ ("st1 {%1.4h},[%0]"
16451            :
16452            : "r"(a), "w"(b)
16453            : "memory");
16454 }
16455 
16456 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s8(int8_t * a,int8x8_t b)16457 vst1_s8 (int8_t * a, int8x8_t b)
16458 {
16459   __asm__ ("st1 {%1.8b},[%0]"
16460            :
16461            : "r"(a), "w"(b)
16462            : "memory");
16463 }
16464 
16465 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s16(int16_t * a,int16x4_t b)16466 vst1_s16 (int16_t * a, int16x4_t b)
16467 {
16468   __asm__ ("st1 {%1.4h},[%0]"
16469            :
16470            : "r"(a), "w"(b)
16471            : "memory");
16472 }
16473 
16474 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s32(int32_t * a,int32x2_t b)16475 vst1_s32 (int32_t * a, int32x2_t b)
16476 {
16477   __asm__ ("st1 {%1.2s},[%0]"
16478            :
16479            : "r"(a), "w"(b)
16480            : "memory");
16481 }
16482 
16483 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s64(int64_t * a,int64x1_t b)16484 vst1_s64 (int64_t * a, int64x1_t b)
16485 {
16486   __asm__ ("st1 {%1.1d},[%0]"
16487            :
16488            : "r"(a), "w"(b)
16489            : "memory");
16490 }
16491 
16492 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u8(uint8_t * a,uint8x8_t b)16493 vst1_u8 (uint8_t * a, uint8x8_t b)
16494 {
16495   __asm__ ("st1 {%1.8b},[%0]"
16496            :
16497            : "r"(a), "w"(b)
16498            : "memory");
16499 }
16500 
16501 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u16(uint16_t * a,uint16x4_t b)16502 vst1_u16 (uint16_t * a, uint16x4_t b)
16503 {
16504   __asm__ ("st1 {%1.4h},[%0]"
16505            :
16506            : "r"(a), "w"(b)
16507            : "memory");
16508 }
16509 
16510 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u32(uint32_t * a,uint32x2_t b)16511 vst1_u32 (uint32_t * a, uint32x2_t b)
16512 {
16513   __asm__ ("st1 {%1.2s},[%0]"
16514            :
16515            : "r"(a), "w"(b)
16516            : "memory");
16517 }
16518 
16519 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1_u64(uint64_t * a,uint64x1_t b)16520 vst1_u64 (uint64_t * a, uint64x1_t b)
16521 {
16522   __asm__ ("st1 {%1.1d},[%0]"
16523            :
16524            : "r"(a), "w"(b)
16525            : "memory");
16526 }
16527 
16528 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32(float32_t * a,float32x4_t b)16529 vst1q_f32 (float32_t * a, float32x4_t b)
16530 {
16531   __asm__ ("st1 {%1.4s},[%0]"
16532            :
16533            : "r"(a), "w"(b)
16534            : "memory");
16535 }
16536 
16537 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f64(float64_t * a,float64x2_t b)16538 vst1q_f64 (float64_t * a, float64x2_t b)
16539 {
16540   __asm__ ("st1 {%1.2d},[%0]"
16541            :
16542            : "r"(a), "w"(b)
16543            : "memory");
16544 }
16545 
16546 #define vst1q_lane_f32(a, b, c)                                         \
16547   __extension__                                                         \
16548     ({                                                                  \
16549        float32x4_t b_ = (b);                                            \
16550        float32_t * a_ = (a);                                            \
16551        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16552                 :                                                       \
16553                 : "r"(a_), "w"(b_), "i"(c)                              \
16554                 : "memory");                                            \
16555      })
16556 
16557 #define vst1q_lane_f64(a, b, c)                                         \
16558   __extension__                                                         \
16559     ({                                                                  \
16560        float64x2_t b_ = (b);                                            \
16561        float64_t * a_ = (a);                                            \
16562        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16563                 :                                                       \
16564                 : "r"(a_), "w"(b_), "i"(c)                              \
16565                 : "memory");                                            \
16566      })
16567 
16568 #define vst1q_lane_p8(a, b, c)                                          \
16569   __extension__                                                         \
16570     ({                                                                  \
16571        poly8x16_t b_ = (b);                                             \
16572        poly8_t * a_ = (a);                                              \
16573        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16574                 :                                                       \
16575                 : "r"(a_), "w"(b_), "i"(c)                              \
16576                 : "memory");                                            \
16577      })
16578 
16579 #define vst1q_lane_p16(a, b, c)                                         \
16580   __extension__                                                         \
16581     ({                                                                  \
16582        poly16x8_t b_ = (b);                                             \
16583        poly16_t * a_ = (a);                                             \
16584        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16585                 :                                                       \
16586                 : "r"(a_), "w"(b_), "i"(c)                              \
16587                 : "memory");                                            \
16588      })
16589 
16590 #define vst1q_lane_s8(a, b, c)                                          \
16591   __extension__                                                         \
16592     ({                                                                  \
16593        int8x16_t b_ = (b);                                              \
16594        int8_t * a_ = (a);                                               \
16595        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16596                 :                                                       \
16597                 : "r"(a_), "w"(b_), "i"(c)                              \
16598                 : "memory");                                            \
16599      })
16600 
16601 #define vst1q_lane_s16(a, b, c)                                         \
16602   __extension__                                                         \
16603     ({                                                                  \
16604        int16x8_t b_ = (b);                                              \
16605        int16_t * a_ = (a);                                              \
16606        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16607                 :                                                       \
16608                 : "r"(a_), "w"(b_), "i"(c)                              \
16609                 : "memory");                                            \
16610      })
16611 
16612 #define vst1q_lane_s32(a, b, c)                                         \
16613   __extension__                                                         \
16614     ({                                                                  \
16615        int32x4_t b_ = (b);                                              \
16616        int32_t * a_ = (a);                                              \
16617        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16618                 :                                                       \
16619                 : "r"(a_), "w"(b_), "i"(c)                              \
16620                 : "memory");                                            \
16621      })
16622 
16623 #define vst1q_lane_s64(a, b, c)                                         \
16624   __extension__                                                         \
16625     ({                                                                  \
16626        int64x2_t b_ = (b);                                              \
16627        int64_t * a_ = (a);                                              \
16628        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16629                 :                                                       \
16630                 : "r"(a_), "w"(b_), "i"(c)                              \
16631                 : "memory");                                            \
16632      })
16633 
16634 #define vst1q_lane_u8(a, b, c)                                          \
16635   __extension__                                                         \
16636     ({                                                                  \
16637        uint8x16_t b_ = (b);                                             \
16638        uint8_t * a_ = (a);                                              \
16639        __asm__ ("st1 {%1.b}[%2],[%0]"                                   \
16640                 :                                                       \
16641                 : "r"(a_), "w"(b_), "i"(c)                              \
16642                 : "memory");                                            \
16643      })
16644 
16645 #define vst1q_lane_u16(a, b, c)                                         \
16646   __extension__                                                         \
16647     ({                                                                  \
16648        uint16x8_t b_ = (b);                                             \
16649        uint16_t * a_ = (a);                                             \
16650        __asm__ ("st1 {%1.h}[%2],[%0]"                                   \
16651                 :                                                       \
16652                 : "r"(a_), "w"(b_), "i"(c)                              \
16653                 : "memory");                                            \
16654      })
16655 
16656 #define vst1q_lane_u32(a, b, c)                                         \
16657   __extension__                                                         \
16658     ({                                                                  \
16659        uint32x4_t b_ = (b);                                             \
16660        uint32_t * a_ = (a);                                             \
16661        __asm__ ("st1 {%1.s}[%2],[%0]"                                   \
16662                 :                                                       \
16663                 : "r"(a_), "w"(b_), "i"(c)                              \
16664                 : "memory");                                            \
16665      })
16666 
16667 #define vst1q_lane_u64(a, b, c)                                         \
16668   __extension__                                                         \
16669     ({                                                                  \
16670        uint64x2_t b_ = (b);                                             \
16671        uint64_t * a_ = (a);                                             \
16672        __asm__ ("st1 {%1.d}[%2],[%0]"                                   \
16673                 :                                                       \
16674                 : "r"(a_), "w"(b_), "i"(c)                              \
16675                 : "memory");                                            \
16676      })
16677 
16678 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p8(poly8_t * a,poly8x16_t b)16679 vst1q_p8 (poly8_t * a, poly8x16_t b)
16680 {
16681   __asm__ ("st1 {%1.16b},[%0]"
16682            :
16683            : "r"(a), "w"(b)
16684            : "memory");
16685 }
16686 
16687 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p16(poly16_t * a,poly16x8_t b)16688 vst1q_p16 (poly16_t * a, poly16x8_t b)
16689 {
16690   __asm__ ("st1 {%1.8h},[%0]"
16691            :
16692            : "r"(a), "w"(b)
16693            : "memory");
16694 }
16695 
16696 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s8(int8_t * a,int8x16_t b)16697 vst1q_s8 (int8_t * a, int8x16_t b)
16698 {
16699   __asm__ ("st1 {%1.16b},[%0]"
16700            :
16701            : "r"(a), "w"(b)
16702            : "memory");
16703 }
16704 
16705 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s16(int16_t * a,int16x8_t b)16706 vst1q_s16 (int16_t * a, int16x8_t b)
16707 {
16708   __asm__ ("st1 {%1.8h},[%0]"
16709            :
16710            : "r"(a), "w"(b)
16711            : "memory");
16712 }
16713 
16714 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s32(int32_t * a,int32x4_t b)16715 vst1q_s32 (int32_t * a, int32x4_t b)
16716 {
16717   __asm__ ("st1 {%1.4s},[%0]"
16718            :
16719            : "r"(a), "w"(b)
16720            : "memory");
16721 }
16722 
16723 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s64(int64_t * a,int64x2_t b)16724 vst1q_s64 (int64_t * a, int64x2_t b)
16725 {
16726   __asm__ ("st1 {%1.2d},[%0]"
16727            :
16728            : "r"(a), "w"(b)
16729            : "memory");
16730 }
16731 
16732 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u8(uint8_t * a,uint8x16_t b)16733 vst1q_u8 (uint8_t * a, uint8x16_t b)
16734 {
16735   __asm__ ("st1 {%1.16b},[%0]"
16736            :
16737            : "r"(a), "w"(b)
16738            : "memory");
16739 }
16740 
16741 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u16(uint16_t * a,uint16x8_t b)16742 vst1q_u16 (uint16_t * a, uint16x8_t b)
16743 {
16744   __asm__ ("st1 {%1.8h},[%0]"
16745            :
16746            : "r"(a), "w"(b)
16747            : "memory");
16748 }
16749 
16750 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u32(uint32_t * a,uint32x4_t b)16751 vst1q_u32 (uint32_t * a, uint32x4_t b)
16752 {
16753   __asm__ ("st1 {%1.4s},[%0]"
16754            :
16755            : "r"(a), "w"(b)
16756            : "memory");
16757 }
16758 
16759 __extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_u64(uint64_t * a,uint64x2_t b)16760 vst1q_u64 (uint64_t * a, uint64x2_t b)
16761 {
16762   __asm__ ("st1 {%1.2d},[%0]"
16763            :
16764            : "r"(a), "w"(b)
16765            : "memory");
16766 }
16767 
16768 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubhn_high_s16(int8x8_t a,int16x8_t b,int16x8_t c)16769 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
16770 {
16771   int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
16772   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16773            : "+w"(result)
16774            : "w"(b), "w"(c)
16775            : /* No clobbers */);
16776   return result;
16777 }
16778 
16779 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubhn_high_s32(int16x4_t a,int32x4_t b,int32x4_t c)16780 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
16781 {
16782   int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
16783   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16784            : "+w"(result)
16785            : "w"(b), "w"(c)
16786            : /* No clobbers */);
16787   return result;
16788 }
16789 
16790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubhn_high_s64(int32x2_t a,int64x2_t b,int64x2_t c)16791 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
16792 {
16793   int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
16794   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16795            : "+w"(result)
16796            : "w"(b), "w"(c)
16797            : /* No clobbers */);
16798   return result;
16799 }
16800 
16801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubhn_high_u16(uint8x8_t a,uint16x8_t b,uint16x8_t c)16802 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
16803 {
16804   uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
16805   __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16806            : "+w"(result)
16807            : "w"(b), "w"(c)
16808            : /* No clobbers */);
16809   return result;
16810 }
16811 
16812 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubhn_high_u32(uint16x4_t a,uint32x4_t b,uint32x4_t c)16813 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
16814 {
16815   uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
16816   __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16817            : "+w"(result)
16818            : "w"(b), "w"(c)
16819            : /* No clobbers */);
16820   return result;
16821 }
16822 
16823 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubhn_high_u64(uint32x2_t a,uint64x2_t b,uint64x2_t c)16824 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
16825 {
16826   uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
16827   __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16828            : "+w"(result)
16829            : "w"(b), "w"(c)
16830            : /* No clobbers */);
16831   return result;
16832 }
16833 
16834 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsubhn_s16(int16x8_t a,int16x8_t b)16835 vsubhn_s16 (int16x8_t a, int16x8_t b)
16836 {
16837   int8x8_t result;
16838   __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16839            : "=w"(result)
16840            : "w"(a), "w"(b)
16841            : /* No clobbers */);
16842   return result;
16843 }
16844 
16845 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsubhn_s32(int32x4_t a,int32x4_t b)16846 vsubhn_s32 (int32x4_t a, int32x4_t b)
16847 {
16848   int16x4_t result;
16849   __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16850            : "=w"(result)
16851            : "w"(a), "w"(b)
16852            : /* No clobbers */);
16853   return result;
16854 }
16855 
16856 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsubhn_s64(int64x2_t a,int64x2_t b)16857 vsubhn_s64 (int64x2_t a, int64x2_t b)
16858 {
16859   int32x2_t result;
16860   __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16861            : "=w"(result)
16862            : "w"(a), "w"(b)
16863            : /* No clobbers */);
16864   return result;
16865 }
16866 
16867 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsubhn_u16(uint16x8_t a,uint16x8_t b)16868 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
16869 {
16870   uint8x8_t result;
16871   __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16872            : "=w"(result)
16873            : "w"(a), "w"(b)
16874            : /* No clobbers */);
16875   return result;
16876 }
16877 
16878 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsubhn_u32(uint32x4_t a,uint32x4_t b)16879 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
16880 {
16881   uint16x4_t result;
16882   __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16883            : "=w"(result)
16884            : "w"(a), "w"(b)
16885            : /* No clobbers */);
16886   return result;
16887 }
16888 
16889 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsubhn_u64(uint64x2_t a,uint64x2_t b)16890 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
16891 {
16892   uint32x2_t result;
16893   __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16894            : "=w"(result)
16895            : "w"(a), "w"(b)
16896            : /* No clobbers */);
16897   return result;
16898 }
16899 
16900 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vtrn1_f32(float32x2_t a,float32x2_t b)16901 vtrn1_f32 (float32x2_t a, float32x2_t b)
16902 {
16903   float32x2_t result;
16904   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16905            : "=w"(result)
16906            : "w"(a), "w"(b)
16907            : /* No clobbers */);
16908   return result;
16909 }
16910 
16911 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtrn1_p8(poly8x8_t a,poly8x8_t b)16912 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
16913 {
16914   poly8x8_t result;
16915   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16916            : "=w"(result)
16917            : "w"(a), "w"(b)
16918            : /* No clobbers */);
16919   return result;
16920 }
16921 
16922 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtrn1_p16(poly16x4_t a,poly16x4_t b)16923 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
16924 {
16925   poly16x4_t result;
16926   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16927            : "=w"(result)
16928            : "w"(a), "w"(b)
16929            : /* No clobbers */);
16930   return result;
16931 }
16932 
16933 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtrn1_s8(int8x8_t a,int8x8_t b)16934 vtrn1_s8 (int8x8_t a, int8x8_t b)
16935 {
16936   int8x8_t result;
16937   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16938            : "=w"(result)
16939            : "w"(a), "w"(b)
16940            : /* No clobbers */);
16941   return result;
16942 }
16943 
16944 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtrn1_s16(int16x4_t a,int16x4_t b)16945 vtrn1_s16 (int16x4_t a, int16x4_t b)
16946 {
16947   int16x4_t result;
16948   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16949            : "=w"(result)
16950            : "w"(a), "w"(b)
16951            : /* No clobbers */);
16952   return result;
16953 }
16954 
16955 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtrn1_s32(int32x2_t a,int32x2_t b)16956 vtrn1_s32 (int32x2_t a, int32x2_t b)
16957 {
16958   int32x2_t result;
16959   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16960            : "=w"(result)
16961            : "w"(a), "w"(b)
16962            : /* No clobbers */);
16963   return result;
16964 }
16965 
16966 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtrn1_u8(uint8x8_t a,uint8x8_t b)16967 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
16968 {
16969   uint8x8_t result;
16970   __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16971            : "=w"(result)
16972            : "w"(a), "w"(b)
16973            : /* No clobbers */);
16974   return result;
16975 }
16976 
16977 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtrn1_u16(uint16x4_t a,uint16x4_t b)16978 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
16979 {
16980   uint16x4_t result;
16981   __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16982            : "=w"(result)
16983            : "w"(a), "w"(b)
16984            : /* No clobbers */);
16985   return result;
16986 }
16987 
16988 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtrn1_u32(uint32x2_t a,uint32x2_t b)16989 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
16990 {
16991   uint32x2_t result;
16992   __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16993            : "=w"(result)
16994            : "w"(a), "w"(b)
16995            : /* No clobbers */);
16996   return result;
16997 }
16998 
16999 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vtrn1q_f32(float32x4_t a,float32x4_t b)17000 vtrn1q_f32 (float32x4_t a, float32x4_t b)
17001 {
17002   float32x4_t result;
17003   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
17004            : "=w"(result)
17005            : "w"(a), "w"(b)
17006            : /* No clobbers */);
17007   return result;
17008 }
17009 
17010 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vtrn1q_f64(float64x2_t a,float64x2_t b)17011 vtrn1q_f64 (float64x2_t a, float64x2_t b)
17012 {
17013   float64x2_t result;
17014   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
17015            : "=w"(result)
17016            : "w"(a), "w"(b)
17017            : /* No clobbers */);
17018   return result;
17019 }
17020 
17021 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vtrn1q_p8(poly8x16_t a,poly8x16_t b)17022 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
17023 {
17024   poly8x16_t result;
17025   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
17026            : "=w"(result)
17027            : "w"(a), "w"(b)
17028            : /* No clobbers */);
17029   return result;
17030 }
17031 
17032 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vtrn1q_p16(poly16x8_t a,poly16x8_t b)17033 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
17034 {
17035   poly16x8_t result;
17036   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
17037            : "=w"(result)
17038            : "w"(a), "w"(b)
17039            : /* No clobbers */);
17040   return result;
17041 }
17042 
17043 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtrn1q_s8(int8x16_t a,int8x16_t b)17044 vtrn1q_s8 (int8x16_t a, int8x16_t b)
17045 {
17046   int8x16_t result;
17047   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
17048            : "=w"(result)
17049            : "w"(a), "w"(b)
17050            : /* No clobbers */);
17051   return result;
17052 }
17053 
17054 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtrn1q_s16(int16x8_t a,int16x8_t b)17055 vtrn1q_s16 (int16x8_t a, int16x8_t b)
17056 {
17057   int16x8_t result;
17058   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
17059            : "=w"(result)
17060            : "w"(a), "w"(b)
17061            : /* No clobbers */);
17062   return result;
17063 }
17064 
17065 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtrn1q_s32(int32x4_t a,int32x4_t b)17066 vtrn1q_s32 (int32x4_t a, int32x4_t b)
17067 {
17068   int32x4_t result;
17069   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
17070            : "=w"(result)
17071            : "w"(a), "w"(b)
17072            : /* No clobbers */);
17073   return result;
17074 }
17075 
17076 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtrn1q_s64(int64x2_t a,int64x2_t b)17077 vtrn1q_s64 (int64x2_t a, int64x2_t b)
17078 {
17079   int64x2_t result;
17080   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
17081            : "=w"(result)
17082            : "w"(a), "w"(b)
17083            : /* No clobbers */);
17084   return result;
17085 }
17086 
17087 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtrn1q_u8(uint8x16_t a,uint8x16_t b)17088 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
17089 {
17090   uint8x16_t result;
17091   __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
17092            : "=w"(result)
17093            : "w"(a), "w"(b)
17094            : /* No clobbers */);
17095   return result;
17096 }
17097 
17098 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtrn1q_u16(uint16x8_t a,uint16x8_t b)17099 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
17100 {
17101   uint16x8_t result;
17102   __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
17103            : "=w"(result)
17104            : "w"(a), "w"(b)
17105            : /* No clobbers */);
17106   return result;
17107 }
17108 
17109 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtrn1q_u32(uint32x4_t a,uint32x4_t b)17110 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
17111 {
17112   uint32x4_t result;
17113   __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
17114            : "=w"(result)
17115            : "w"(a), "w"(b)
17116            : /* No clobbers */);
17117   return result;
17118 }
17119 
17120 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtrn1q_u64(uint64x2_t a,uint64x2_t b)17121 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
17122 {
17123   uint64x2_t result;
17124   __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
17125            : "=w"(result)
17126            : "w"(a), "w"(b)
17127            : /* No clobbers */);
17128   return result;
17129 }
17130 
17131 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vtrn2_f32(float32x2_t a,float32x2_t b)17132 vtrn2_f32 (float32x2_t a, float32x2_t b)
17133 {
17134   float32x2_t result;
17135   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17136            : "=w"(result)
17137            : "w"(a), "w"(b)
17138            : /* No clobbers */);
17139   return result;
17140 }
17141 
17142 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtrn2_p8(poly8x8_t a,poly8x8_t b)17143 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
17144 {
17145   poly8x8_t result;
17146   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17147            : "=w"(result)
17148            : "w"(a), "w"(b)
17149            : /* No clobbers */);
17150   return result;
17151 }
17152 
17153 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtrn2_p16(poly16x4_t a,poly16x4_t b)17154 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
17155 {
17156   poly16x4_t result;
17157   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17158            : "=w"(result)
17159            : "w"(a), "w"(b)
17160            : /* No clobbers */);
17161   return result;
17162 }
17163 
17164 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtrn2_s8(int8x8_t a,int8x8_t b)17165 vtrn2_s8 (int8x8_t a, int8x8_t b)
17166 {
17167   int8x8_t result;
17168   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17169            : "=w"(result)
17170            : "w"(a), "w"(b)
17171            : /* No clobbers */);
17172   return result;
17173 }
17174 
17175 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtrn2_s16(int16x4_t a,int16x4_t b)17176 vtrn2_s16 (int16x4_t a, int16x4_t b)
17177 {
17178   int16x4_t result;
17179   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17180            : "=w"(result)
17181            : "w"(a), "w"(b)
17182            : /* No clobbers */);
17183   return result;
17184 }
17185 
17186 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtrn2_s32(int32x2_t a,int32x2_t b)17187 vtrn2_s32 (int32x2_t a, int32x2_t b)
17188 {
17189   int32x2_t result;
17190   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17191            : "=w"(result)
17192            : "w"(a), "w"(b)
17193            : /* No clobbers */);
17194   return result;
17195 }
17196 
17197 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtrn2_u8(uint8x8_t a,uint8x8_t b)17198 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
17199 {
17200   uint8x8_t result;
17201   __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17202            : "=w"(result)
17203            : "w"(a), "w"(b)
17204            : /* No clobbers */);
17205   return result;
17206 }
17207 
17208 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtrn2_u16(uint16x4_t a,uint16x4_t b)17209 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
17210 {
17211   uint16x4_t result;
17212   __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17213            : "=w"(result)
17214            : "w"(a), "w"(b)
17215            : /* No clobbers */);
17216   return result;
17217 }
17218 
17219 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtrn2_u32(uint32x2_t a,uint32x2_t b)17220 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
17221 {
17222   uint32x2_t result;
17223   __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17224            : "=w"(result)
17225            : "w"(a), "w"(b)
17226            : /* No clobbers */);
17227   return result;
17228 }
17229 
17230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vtrn2q_f32(float32x4_t a,float32x4_t b)17231 vtrn2q_f32 (float32x4_t a, float32x4_t b)
17232 {
17233   float32x4_t result;
17234   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17235            : "=w"(result)
17236            : "w"(a), "w"(b)
17237            : /* No clobbers */);
17238   return result;
17239 }
17240 
17241 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vtrn2q_f64(float64x2_t a,float64x2_t b)17242 vtrn2q_f64 (float64x2_t a, float64x2_t b)
17243 {
17244   float64x2_t result;
17245   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17246            : "=w"(result)
17247            : "w"(a), "w"(b)
17248            : /* No clobbers */);
17249   return result;
17250 }
17251 
17252 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vtrn2q_p8(poly8x16_t a,poly8x16_t b)17253 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
17254 {
17255   poly8x16_t result;
17256   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17257            : "=w"(result)
17258            : "w"(a), "w"(b)
17259            : /* No clobbers */);
17260   return result;
17261 }
17262 
17263 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vtrn2q_p16(poly16x8_t a,poly16x8_t b)17264 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
17265 {
17266   poly16x8_t result;
17267   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17268            : "=w"(result)
17269            : "w"(a), "w"(b)
17270            : /* No clobbers */);
17271   return result;
17272 }
17273 
17274 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtrn2q_s8(int8x16_t a,int8x16_t b)17275 vtrn2q_s8 (int8x16_t a, int8x16_t b)
17276 {
17277   int8x16_t result;
17278   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17279            : "=w"(result)
17280            : "w"(a), "w"(b)
17281            : /* No clobbers */);
17282   return result;
17283 }
17284 
17285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtrn2q_s16(int16x8_t a,int16x8_t b)17286 vtrn2q_s16 (int16x8_t a, int16x8_t b)
17287 {
17288   int16x8_t result;
17289   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17290            : "=w"(result)
17291            : "w"(a), "w"(b)
17292            : /* No clobbers */);
17293   return result;
17294 }
17295 
17296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtrn2q_s32(int32x4_t a,int32x4_t b)17297 vtrn2q_s32 (int32x4_t a, int32x4_t b)
17298 {
17299   int32x4_t result;
17300   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17301            : "=w"(result)
17302            : "w"(a), "w"(b)
17303            : /* No clobbers */);
17304   return result;
17305 }
17306 
17307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtrn2q_s64(int64x2_t a,int64x2_t b)17308 vtrn2q_s64 (int64x2_t a, int64x2_t b)
17309 {
17310   int64x2_t result;
17311   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17312            : "=w"(result)
17313            : "w"(a), "w"(b)
17314            : /* No clobbers */);
17315   return result;
17316 }
17317 
17318 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtrn2q_u8(uint8x16_t a,uint8x16_t b)17319 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
17320 {
17321   uint8x16_t result;
17322   __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17323            : "=w"(result)
17324            : "w"(a), "w"(b)
17325            : /* No clobbers */);
17326   return result;
17327 }
17328 
17329 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtrn2q_u16(uint16x8_t a,uint16x8_t b)17330 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
17331 {
17332   uint16x8_t result;
17333   __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17334            : "=w"(result)
17335            : "w"(a), "w"(b)
17336            : /* No clobbers */);
17337   return result;
17338 }
17339 
17340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtrn2q_u32(uint32x4_t a,uint32x4_t b)17341 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
17342 {
17343   uint32x4_t result;
17344   __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17345            : "=w"(result)
17346            : "w"(a), "w"(b)
17347            : /* No clobbers */);
17348   return result;
17349 }
17350 
17351 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtrn2q_u64(uint64x2_t a,uint64x2_t b)17352 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
17353 {
17354   uint64x2_t result;
17355   __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17356            : "=w"(result)
17357            : "w"(a), "w"(b)
17358            : /* No clobbers */);
17359   return result;
17360 }
17361 
17362 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_p8(poly8x8_t a,poly8x8_t b)17363 vtst_p8 (poly8x8_t a, poly8x8_t b)
17364 {
17365   uint8x8_t result;
17366   __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
17367            : "=w"(result)
17368            : "w"(a), "w"(b)
17369            : /* No clobbers */);
17370   return result;
17371 }
17372 
17373 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_p16(poly16x4_t a,poly16x4_t b)17374 vtst_p16 (poly16x4_t a, poly16x4_t b)
17375 {
17376   uint16x4_t result;
17377   __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
17378            : "=w"(result)
17379            : "w"(a), "w"(b)
17380            : /* No clobbers */);
17381   return result;
17382 }
17383 
17384 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_p8(poly8x16_t a,poly8x16_t b)17385 vtstq_p8 (poly8x16_t a, poly8x16_t b)
17386 {
17387   uint8x16_t result;
17388   __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
17389            : "=w"(result)
17390            : "w"(a), "w"(b)
17391            : /* No clobbers */);
17392   return result;
17393 }
17394 
17395 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_p16(poly16x8_t a,poly16x8_t b)17396 vtstq_p16 (poly16x8_t a, poly16x8_t b)
17397 {
17398   uint16x8_t result;
17399   __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
17400            : "=w"(result)
17401            : "w"(a), "w"(b)
17402            : /* No clobbers */);
17403   return result;
17404 }
17405 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp1_f32(float32x2_t a,float32x2_t b)17406 vuzp1_f32 (float32x2_t a, float32x2_t b)
17407 {
17408   float32x2_t result;
17409   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17410            : "=w"(result)
17411            : "w"(a), "w"(b)
17412            : /* No clobbers */);
17413   return result;
17414 }
17415 
17416 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp1_p8(poly8x8_t a,poly8x8_t b)17417 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
17418 {
17419   poly8x8_t result;
17420   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17421            : "=w"(result)
17422            : "w"(a), "w"(b)
17423            : /* No clobbers */);
17424   return result;
17425 }
17426 
17427 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp1_p16(poly16x4_t a,poly16x4_t b)17428 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
17429 {
17430   poly16x4_t result;
17431   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17432            : "=w"(result)
17433            : "w"(a), "w"(b)
17434            : /* No clobbers */);
17435   return result;
17436 }
17437 
17438 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp1_s8(int8x8_t a,int8x8_t b)17439 vuzp1_s8 (int8x8_t a, int8x8_t b)
17440 {
17441   int8x8_t result;
17442   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17443            : "=w"(result)
17444            : "w"(a), "w"(b)
17445            : /* No clobbers */);
17446   return result;
17447 }
17448 
17449 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp1_s16(int16x4_t a,int16x4_t b)17450 vuzp1_s16 (int16x4_t a, int16x4_t b)
17451 {
17452   int16x4_t result;
17453   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17454            : "=w"(result)
17455            : "w"(a), "w"(b)
17456            : /* No clobbers */);
17457   return result;
17458 }
17459 
17460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp1_s32(int32x2_t a,int32x2_t b)17461 vuzp1_s32 (int32x2_t a, int32x2_t b)
17462 {
17463   int32x2_t result;
17464   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17465            : "=w"(result)
17466            : "w"(a), "w"(b)
17467            : /* No clobbers */);
17468   return result;
17469 }
17470 
17471 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp1_u8(uint8x8_t a,uint8x8_t b)17472 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
17473 {
17474   uint8x8_t result;
17475   __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17476            : "=w"(result)
17477            : "w"(a), "w"(b)
17478            : /* No clobbers */);
17479   return result;
17480 }
17481 
17482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp1_u16(uint16x4_t a,uint16x4_t b)17483 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
17484 {
17485   uint16x4_t result;
17486   __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17487            : "=w"(result)
17488            : "w"(a), "w"(b)
17489            : /* No clobbers */);
17490   return result;
17491 }
17492 
17493 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp1_u32(uint32x2_t a,uint32x2_t b)17494 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
17495 {
17496   uint32x2_t result;
17497   __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17498            : "=w"(result)
17499            : "w"(a), "w"(b)
17500            : /* No clobbers */);
17501   return result;
17502 }
17503 
17504 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp1q_f32(float32x4_t a,float32x4_t b)17505 vuzp1q_f32 (float32x4_t a, float32x4_t b)
17506 {
17507   float32x4_t result;
17508   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17509            : "=w"(result)
17510            : "w"(a), "w"(b)
17511            : /* No clobbers */);
17512   return result;
17513 }
17514 
17515 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp1q_f64(float64x2_t a,float64x2_t b)17516 vuzp1q_f64 (float64x2_t a, float64x2_t b)
17517 {
17518   float64x2_t result;
17519   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17520            : "=w"(result)
17521            : "w"(a), "w"(b)
17522            : /* No clobbers */);
17523   return result;
17524 }
17525 
17526 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp1q_p8(poly8x16_t a,poly8x16_t b)17527 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
17528 {
17529   poly8x16_t result;
17530   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17531            : "=w"(result)
17532            : "w"(a), "w"(b)
17533            : /* No clobbers */);
17534   return result;
17535 }
17536 
17537 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp1q_p16(poly16x8_t a,poly16x8_t b)17538 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
17539 {
17540   poly16x8_t result;
17541   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17542            : "=w"(result)
17543            : "w"(a), "w"(b)
17544            : /* No clobbers */);
17545   return result;
17546 }
17547 
17548 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp1q_s8(int8x16_t a,int8x16_t b)17549 vuzp1q_s8 (int8x16_t a, int8x16_t b)
17550 {
17551   int8x16_t result;
17552   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17553            : "=w"(result)
17554            : "w"(a), "w"(b)
17555            : /* No clobbers */);
17556   return result;
17557 }
17558 
17559 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp1q_s16(int16x8_t a,int16x8_t b)17560 vuzp1q_s16 (int16x8_t a, int16x8_t b)
17561 {
17562   int16x8_t result;
17563   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17564            : "=w"(result)
17565            : "w"(a), "w"(b)
17566            : /* No clobbers */);
17567   return result;
17568 }
17569 
17570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp1q_s32(int32x4_t a,int32x4_t b)17571 vuzp1q_s32 (int32x4_t a, int32x4_t b)
17572 {
17573   int32x4_t result;
17574   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17575            : "=w"(result)
17576            : "w"(a), "w"(b)
17577            : /* No clobbers */);
17578   return result;
17579 }
17580 
17581 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp1q_s64(int64x2_t a,int64x2_t b)17582 vuzp1q_s64 (int64x2_t a, int64x2_t b)
17583 {
17584   int64x2_t result;
17585   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17586            : "=w"(result)
17587            : "w"(a), "w"(b)
17588            : /* No clobbers */);
17589   return result;
17590 }
17591 
17592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp1q_u8(uint8x16_t a,uint8x16_t b)17593 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
17594 {
17595   uint8x16_t result;
17596   __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17597            : "=w"(result)
17598            : "w"(a), "w"(b)
17599            : /* No clobbers */);
17600   return result;
17601 }
17602 
17603 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp1q_u16(uint16x8_t a,uint16x8_t b)17604 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
17605 {
17606   uint16x8_t result;
17607   __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17608            : "=w"(result)
17609            : "w"(a), "w"(b)
17610            : /* No clobbers */);
17611   return result;
17612 }
17613 
17614 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp1q_u32(uint32x4_t a,uint32x4_t b)17615 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
17616 {
17617   uint32x4_t result;
17618   __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17619            : "=w"(result)
17620            : "w"(a), "w"(b)
17621            : /* No clobbers */);
17622   return result;
17623 }
17624 
17625 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp1q_u64(uint64x2_t a,uint64x2_t b)17626 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
17627 {
17628   uint64x2_t result;
17629   __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17630            : "=w"(result)
17631            : "w"(a), "w"(b)
17632            : /* No clobbers */);
17633   return result;
17634 }
17635 
17636 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp2_f32(float32x2_t a,float32x2_t b)17637 vuzp2_f32 (float32x2_t a, float32x2_t b)
17638 {
17639   float32x2_t result;
17640   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17641            : "=w"(result)
17642            : "w"(a), "w"(b)
17643            : /* No clobbers */);
17644   return result;
17645 }
17646 
17647 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp2_p8(poly8x8_t a,poly8x8_t b)17648 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
17649 {
17650   poly8x8_t result;
17651   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17652            : "=w"(result)
17653            : "w"(a), "w"(b)
17654            : /* No clobbers */);
17655   return result;
17656 }
17657 
17658 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp2_p16(poly16x4_t a,poly16x4_t b)17659 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
17660 {
17661   poly16x4_t result;
17662   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17663            : "=w"(result)
17664            : "w"(a), "w"(b)
17665            : /* No clobbers */);
17666   return result;
17667 }
17668 
17669 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp2_s8(int8x8_t a,int8x8_t b)17670 vuzp2_s8 (int8x8_t a, int8x8_t b)
17671 {
17672   int8x8_t result;
17673   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17674            : "=w"(result)
17675            : "w"(a), "w"(b)
17676            : /* No clobbers */);
17677   return result;
17678 }
17679 
17680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp2_s16(int16x4_t a,int16x4_t b)17681 vuzp2_s16 (int16x4_t a, int16x4_t b)
17682 {
17683   int16x4_t result;
17684   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17685            : "=w"(result)
17686            : "w"(a), "w"(b)
17687            : /* No clobbers */);
17688   return result;
17689 }
17690 
17691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp2_s32(int32x2_t a,int32x2_t b)17692 vuzp2_s32 (int32x2_t a, int32x2_t b)
17693 {
17694   int32x2_t result;
17695   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17696            : "=w"(result)
17697            : "w"(a), "w"(b)
17698            : /* No clobbers */);
17699   return result;
17700 }
17701 
17702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp2_u8(uint8x8_t a,uint8x8_t b)17703 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
17704 {
17705   uint8x8_t result;
17706   __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17707            : "=w"(result)
17708            : "w"(a), "w"(b)
17709            : /* No clobbers */);
17710   return result;
17711 }
17712 
17713 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp2_u16(uint16x4_t a,uint16x4_t b)17714 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
17715 {
17716   uint16x4_t result;
17717   __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17718            : "=w"(result)
17719            : "w"(a), "w"(b)
17720            : /* No clobbers */);
17721   return result;
17722 }
17723 
17724 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp2_u32(uint32x2_t a,uint32x2_t b)17725 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
17726 {
17727   uint32x2_t result;
17728   __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17729            : "=w"(result)
17730            : "w"(a), "w"(b)
17731            : /* No clobbers */);
17732   return result;
17733 }
17734 
17735 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp2q_f32(float32x4_t a,float32x4_t b)17736 vuzp2q_f32 (float32x4_t a, float32x4_t b)
17737 {
17738   float32x4_t result;
17739   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17740            : "=w"(result)
17741            : "w"(a), "w"(b)
17742            : /* No clobbers */);
17743   return result;
17744 }
17745 
17746 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp2q_f64(float64x2_t a,float64x2_t b)17747 vuzp2q_f64 (float64x2_t a, float64x2_t b)
17748 {
17749   float64x2_t result;
17750   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17751            : "=w"(result)
17752            : "w"(a), "w"(b)
17753            : /* No clobbers */);
17754   return result;
17755 }
17756 
17757 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp2q_p8(poly8x16_t a,poly8x16_t b)17758 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
17759 {
17760   poly8x16_t result;
17761   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17762            : "=w"(result)
17763            : "w"(a), "w"(b)
17764            : /* No clobbers */);
17765   return result;
17766 }
17767 
17768 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp2q_p16(poly16x8_t a,poly16x8_t b)17769 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
17770 {
17771   poly16x8_t result;
17772   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17773            : "=w"(result)
17774            : "w"(a), "w"(b)
17775            : /* No clobbers */);
17776   return result;
17777 }
17778 
17779 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp2q_s8(int8x16_t a,int8x16_t b)17780 vuzp2q_s8 (int8x16_t a, int8x16_t b)
17781 {
17782   int8x16_t result;
17783   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17784            : "=w"(result)
17785            : "w"(a), "w"(b)
17786            : /* No clobbers */);
17787   return result;
17788 }
17789 
17790 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp2q_s16(int16x8_t a,int16x8_t b)17791 vuzp2q_s16 (int16x8_t a, int16x8_t b)
17792 {
17793   int16x8_t result;
17794   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17795            : "=w"(result)
17796            : "w"(a), "w"(b)
17797            : /* No clobbers */);
17798   return result;
17799 }
17800 
17801 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp2q_s32(int32x4_t a,int32x4_t b)17802 vuzp2q_s32 (int32x4_t a, int32x4_t b)
17803 {
17804   int32x4_t result;
17805   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17806            : "=w"(result)
17807            : "w"(a), "w"(b)
17808            : /* No clobbers */);
17809   return result;
17810 }
17811 
17812 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp2q_s64(int64x2_t a,int64x2_t b)17813 vuzp2q_s64 (int64x2_t a, int64x2_t b)
17814 {
17815   int64x2_t result;
17816   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17817            : "=w"(result)
17818            : "w"(a), "w"(b)
17819            : /* No clobbers */);
17820   return result;
17821 }
17822 
17823 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp2q_u8(uint8x16_t a,uint8x16_t b)17824 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
17825 {
17826   uint8x16_t result;
17827   __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17828            : "=w"(result)
17829            : "w"(a), "w"(b)
17830            : /* No clobbers */);
17831   return result;
17832 }
17833 
17834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp2q_u16(uint16x8_t a,uint16x8_t b)17835 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
17836 {
17837   uint16x8_t result;
17838   __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17839            : "=w"(result)
17840            : "w"(a), "w"(b)
17841            : /* No clobbers */);
17842   return result;
17843 }
17844 
17845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp2q_u32(uint32x4_t a,uint32x4_t b)17846 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
17847 {
17848   uint32x4_t result;
17849   __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17850            : "=w"(result)
17851            : "w"(a), "w"(b)
17852            : /* No clobbers */);
17853   return result;
17854 }
17855 
17856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp2q_u64(uint64x2_t a,uint64x2_t b)17857 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
17858 {
17859   uint64x2_t result;
17860   __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17861            : "=w"(result)
17862            : "w"(a), "w"(b)
17863            : /* No clobbers */);
17864   return result;
17865 }
17866 
17867 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vzip1_f32(float32x2_t a,float32x2_t b)17868 vzip1_f32 (float32x2_t a, float32x2_t b)
17869 {
17870   float32x2_t result;
17871   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17872            : "=w"(result)
17873            : "w"(a), "w"(b)
17874            : /* No clobbers */);
17875   return result;
17876 }
17877 
17878 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vzip1_p8(poly8x8_t a,poly8x8_t b)17879 vzip1_p8 (poly8x8_t a, poly8x8_t b)
17880 {
17881   poly8x8_t result;
17882   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17883            : "=w"(result)
17884            : "w"(a), "w"(b)
17885            : /* No clobbers */);
17886   return result;
17887 }
17888 
17889 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vzip1_p16(poly16x4_t a,poly16x4_t b)17890 vzip1_p16 (poly16x4_t a, poly16x4_t b)
17891 {
17892   poly16x4_t result;
17893   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17894            : "=w"(result)
17895            : "w"(a), "w"(b)
17896            : /* No clobbers */);
17897   return result;
17898 }
17899 
17900 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vzip1_s8(int8x8_t a,int8x8_t b)17901 vzip1_s8 (int8x8_t a, int8x8_t b)
17902 {
17903   int8x8_t result;
17904   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17905            : "=w"(result)
17906            : "w"(a), "w"(b)
17907            : /* No clobbers */);
17908   return result;
17909 }
17910 
17911 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vzip1_s16(int16x4_t a,int16x4_t b)17912 vzip1_s16 (int16x4_t a, int16x4_t b)
17913 {
17914   int16x4_t result;
17915   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17916            : "=w"(result)
17917            : "w"(a), "w"(b)
17918            : /* No clobbers */);
17919   return result;
17920 }
17921 
17922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vzip1_s32(int32x2_t a,int32x2_t b)17923 vzip1_s32 (int32x2_t a, int32x2_t b)
17924 {
17925   int32x2_t result;
17926   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17927            : "=w"(result)
17928            : "w"(a), "w"(b)
17929            : /* No clobbers */);
17930   return result;
17931 }
17932 
17933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vzip1_u8(uint8x8_t a,uint8x8_t b)17934 vzip1_u8 (uint8x8_t a, uint8x8_t b)
17935 {
17936   uint8x8_t result;
17937   __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17938            : "=w"(result)
17939            : "w"(a), "w"(b)
17940            : /* No clobbers */);
17941   return result;
17942 }
17943 
17944 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vzip1_u16(uint16x4_t a,uint16x4_t b)17945 vzip1_u16 (uint16x4_t a, uint16x4_t b)
17946 {
17947   uint16x4_t result;
17948   __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17949            : "=w"(result)
17950            : "w"(a), "w"(b)
17951            : /* No clobbers */);
17952   return result;
17953 }
17954 
17955 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vzip1_u32(uint32x2_t a,uint32x2_t b)17956 vzip1_u32 (uint32x2_t a, uint32x2_t b)
17957 {
17958   uint32x2_t result;
17959   __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17960            : "=w"(result)
17961            : "w"(a), "w"(b)
17962            : /* No clobbers */);
17963   return result;
17964 }
17965 
17966 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vzip1q_f32(float32x4_t a,float32x4_t b)17967 vzip1q_f32 (float32x4_t a, float32x4_t b)
17968 {
17969   float32x4_t result;
17970   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17971            : "=w"(result)
17972            : "w"(a), "w"(b)
17973            : /* No clobbers */);
17974   return result;
17975 }
17976 
17977 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vzip1q_f64(float64x2_t a,float64x2_t b)17978 vzip1q_f64 (float64x2_t a, float64x2_t b)
17979 {
17980   float64x2_t result;
17981   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17982            : "=w"(result)
17983            : "w"(a), "w"(b)
17984            : /* No clobbers */);
17985   return result;
17986 }
17987 
17988 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vzip1q_p8(poly8x16_t a,poly8x16_t b)17989 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
17990 {
17991   poly8x16_t result;
17992   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17993            : "=w"(result)
17994            : "w"(a), "w"(b)
17995            : /* No clobbers */);
17996   return result;
17997 }
17998 
17999 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vzip1q_p16(poly16x8_t a,poly16x8_t b)18000 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
18001 {
18002   poly16x8_t result;
18003   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
18004            : "=w"(result)
18005            : "w"(a), "w"(b)
18006            : /* No clobbers */);
18007   return result;
18008 }
18009 
18010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vzip1q_s8(int8x16_t a,int8x16_t b)18011 vzip1q_s8 (int8x16_t a, int8x16_t b)
18012 {
18013   int8x16_t result;
18014   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
18015            : "=w"(result)
18016            : "w"(a), "w"(b)
18017            : /* No clobbers */);
18018   return result;
18019 }
18020 
18021 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vzip1q_s16(int16x8_t a,int16x8_t b)18022 vzip1q_s16 (int16x8_t a, int16x8_t b)
18023 {
18024   int16x8_t result;
18025   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
18026            : "=w"(result)
18027            : "w"(a), "w"(b)
18028            : /* No clobbers */);
18029   return result;
18030 }
18031 
18032 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vzip1q_s32(int32x4_t a,int32x4_t b)18033 vzip1q_s32 (int32x4_t a, int32x4_t b)
18034 {
18035   int32x4_t result;
18036   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
18037            : "=w"(result)
18038            : "w"(a), "w"(b)
18039            : /* No clobbers */);
18040   return result;
18041 }
18042 
18043 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vzip1q_s64(int64x2_t a,int64x2_t b)18044 vzip1q_s64 (int64x2_t a, int64x2_t b)
18045 {
18046   int64x2_t result;
18047   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
18048            : "=w"(result)
18049            : "w"(a), "w"(b)
18050            : /* No clobbers */);
18051   return result;
18052 }
18053 
18054 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vzip1q_u8(uint8x16_t a,uint8x16_t b)18055 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
18056 {
18057   uint8x16_t result;
18058   __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
18059            : "=w"(result)
18060            : "w"(a), "w"(b)
18061            : /* No clobbers */);
18062   return result;
18063 }
18064 
18065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vzip1q_u16(uint16x8_t a,uint16x8_t b)18066 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
18067 {
18068   uint16x8_t result;
18069   __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
18070            : "=w"(result)
18071            : "w"(a), "w"(b)
18072            : /* No clobbers */);
18073   return result;
18074 }
18075 
18076 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vzip1q_u32(uint32x4_t a,uint32x4_t b)18077 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
18078 {
18079   uint32x4_t result;
18080   __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
18081            : "=w"(result)
18082            : "w"(a), "w"(b)
18083            : /* No clobbers */);
18084   return result;
18085 }
18086 
18087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vzip1q_u64(uint64x2_t a,uint64x2_t b)18088 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
18089 {
18090   uint64x2_t result;
18091   __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
18092            : "=w"(result)
18093            : "w"(a), "w"(b)
18094            : /* No clobbers */);
18095   return result;
18096 }
18097 
18098 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vzip2_f32(float32x2_t a,float32x2_t b)18099 vzip2_f32 (float32x2_t a, float32x2_t b)
18100 {
18101   float32x2_t result;
18102   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
18103            : "=w"(result)
18104            : "w"(a), "w"(b)
18105            : /* No clobbers */);
18106   return result;
18107 }
18108 
18109 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vzip2_p8(poly8x8_t a,poly8x8_t b)18110 vzip2_p8 (poly8x8_t a, poly8x8_t b)
18111 {
18112   poly8x8_t result;
18113   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
18114            : "=w"(result)
18115            : "w"(a), "w"(b)
18116            : /* No clobbers */);
18117   return result;
18118 }
18119 
18120 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vzip2_p16(poly16x4_t a,poly16x4_t b)18121 vzip2_p16 (poly16x4_t a, poly16x4_t b)
18122 {
18123   poly16x4_t result;
18124   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
18125            : "=w"(result)
18126            : "w"(a), "w"(b)
18127            : /* No clobbers */);
18128   return result;
18129 }
18130 
18131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vzip2_s8(int8x8_t a,int8x8_t b)18132 vzip2_s8 (int8x8_t a, int8x8_t b)
18133 {
18134   int8x8_t result;
18135   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
18136            : "=w"(result)
18137            : "w"(a), "w"(b)
18138            : /* No clobbers */);
18139   return result;
18140 }
18141 
18142 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vzip2_s16(int16x4_t a,int16x4_t b)18143 vzip2_s16 (int16x4_t a, int16x4_t b)
18144 {
18145   int16x4_t result;
18146   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
18147            : "=w"(result)
18148            : "w"(a), "w"(b)
18149            : /* No clobbers */);
18150   return result;
18151 }
18152 
18153 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vzip2_s32(int32x2_t a,int32x2_t b)18154 vzip2_s32 (int32x2_t a, int32x2_t b)
18155 {
18156   int32x2_t result;
18157   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
18158            : "=w"(result)
18159            : "w"(a), "w"(b)
18160            : /* No clobbers */);
18161   return result;
18162 }
18163 
18164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vzip2_u8(uint8x8_t a,uint8x8_t b)18165 vzip2_u8 (uint8x8_t a, uint8x8_t b)
18166 {
18167   uint8x8_t result;
18168   __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
18169            : "=w"(result)
18170            : "w"(a), "w"(b)
18171            : /* No clobbers */);
18172   return result;
18173 }
18174 
18175 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vzip2_u16(uint16x4_t a,uint16x4_t b)18176 vzip2_u16 (uint16x4_t a, uint16x4_t b)
18177 {
18178   uint16x4_t result;
18179   __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
18180            : "=w"(result)
18181            : "w"(a), "w"(b)
18182            : /* No clobbers */);
18183   return result;
18184 }
18185 
18186 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vzip2_u32(uint32x2_t a,uint32x2_t b)18187 vzip2_u32 (uint32x2_t a, uint32x2_t b)
18188 {
18189   uint32x2_t result;
18190   __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
18191            : "=w"(result)
18192            : "w"(a), "w"(b)
18193            : /* No clobbers */);
18194   return result;
18195 }
18196 
18197 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vzip2q_f32(float32x4_t a,float32x4_t b)18198 vzip2q_f32 (float32x4_t a, float32x4_t b)
18199 {
18200   float32x4_t result;
18201   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18202            : "=w"(result)
18203            : "w"(a), "w"(b)
18204            : /* No clobbers */);
18205   return result;
18206 }
18207 
18208 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vzip2q_f64(float64x2_t a,float64x2_t b)18209 vzip2q_f64 (float64x2_t a, float64x2_t b)
18210 {
18211   float64x2_t result;
18212   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18213            : "=w"(result)
18214            : "w"(a), "w"(b)
18215            : /* No clobbers */);
18216   return result;
18217 }
18218 
18219 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vzip2q_p8(poly8x16_t a,poly8x16_t b)18220 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
18221 {
18222   poly8x16_t result;
18223   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18224            : "=w"(result)
18225            : "w"(a), "w"(b)
18226            : /* No clobbers */);
18227   return result;
18228 }
18229 
18230 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vzip2q_p16(poly16x8_t a,poly16x8_t b)18231 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
18232 {
18233   poly16x8_t result;
18234   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18235            : "=w"(result)
18236            : "w"(a), "w"(b)
18237            : /* No clobbers */);
18238   return result;
18239 }
18240 
18241 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vzip2q_s8(int8x16_t a,int8x16_t b)18242 vzip2q_s8 (int8x16_t a, int8x16_t b)
18243 {
18244   int8x16_t result;
18245   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18246            : "=w"(result)
18247            : "w"(a), "w"(b)
18248            : /* No clobbers */);
18249   return result;
18250 }
18251 
18252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vzip2q_s16(int16x8_t a,int16x8_t b)18253 vzip2q_s16 (int16x8_t a, int16x8_t b)
18254 {
18255   int16x8_t result;
18256   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18257            : "=w"(result)
18258            : "w"(a), "w"(b)
18259            : /* No clobbers */);
18260   return result;
18261 }
18262 
18263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vzip2q_s32(int32x4_t a,int32x4_t b)18264 vzip2q_s32 (int32x4_t a, int32x4_t b)
18265 {
18266   int32x4_t result;
18267   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18268            : "=w"(result)
18269            : "w"(a), "w"(b)
18270            : /* No clobbers */);
18271   return result;
18272 }
18273 
18274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vzip2q_s64(int64x2_t a,int64x2_t b)18275 vzip2q_s64 (int64x2_t a, int64x2_t b)
18276 {
18277   int64x2_t result;
18278   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18279            : "=w"(result)
18280            : "w"(a), "w"(b)
18281            : /* No clobbers */);
18282   return result;
18283 }
18284 
18285 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vzip2q_u8(uint8x16_t a,uint8x16_t b)18286 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
18287 {
18288   uint8x16_t result;
18289   __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18290            : "=w"(result)
18291            : "w"(a), "w"(b)
18292            : /* No clobbers */);
18293   return result;
18294 }
18295 
18296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vzip2q_u16(uint16x8_t a,uint16x8_t b)18297 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
18298 {
18299   uint16x8_t result;
18300   __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18301            : "=w"(result)
18302            : "w"(a), "w"(b)
18303            : /* No clobbers */);
18304   return result;
18305 }
18306 
18307 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vzip2q_u32(uint32x4_t a,uint32x4_t b)18308 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
18309 {
18310   uint32x4_t result;
18311   __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18312            : "=w"(result)
18313            : "w"(a), "w"(b)
18314            : /* No clobbers */);
18315   return result;
18316 }
18317 
18318 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vzip2q_u64(uint64x2_t a,uint64x2_t b)18319 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
18320 {
18321   uint64x2_t result;
18322   __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18323            : "=w"(result)
18324            : "w"(a), "w"(b)
18325            : /* No clobbers */);
18326   return result;
18327 }
18328 
18329 /* End of temporary inline asm implementations.  */
18330 
18331 /* Start of temporary inline asm for vldn, vstn and friends.  */
18332 
18333 /* Create struct element types for duplicating loads.
18334 
18335    Create 2 element structures of:
18336 
18337    +------+----+----+----+----+
18338    |      | 8  | 16 | 32 | 64 |
18339    +------+----+----+----+----+
18340    |int   | Y  | Y  | N  | N  |
18341    +------+----+----+----+----+
18342    |uint  | Y  | Y  | N  | N  |
18343    +------+----+----+----+----+
18344    |float | -  | -  | N  | N  |
18345    +------+----+----+----+----+
18346    |poly  | Y  | Y  | -  | -  |
18347    +------+----+----+----+----+
18348 
18349    Create 3 element structures of:
18350 
18351    +------+----+----+----+----+
18352    |      | 8  | 16 | 32 | 64 |
18353    +------+----+----+----+----+
18354    |int   | Y  | Y  | Y  | Y  |
18355    +------+----+----+----+----+
18356    |uint  | Y  | Y  | Y  | Y  |
18357    +------+----+----+----+----+
18358    |float | -  | -  | Y  | Y  |
18359    +------+----+----+----+----+
18360    |poly  | Y  | Y  | -  | -  |
18361    +------+----+----+----+----+
18362 
18363    Create 4 element structures of:
18364 
18365    +------+----+----+----+----+
18366    |      | 8  | 16 | 32 | 64 |
18367    +------+----+----+----+----+
18368    |int   | Y  | N  | N  | Y  |
18369    +------+----+----+----+----+
18370    |uint  | Y  | N  | N  | Y  |
18371    +------+----+----+----+----+
18372    |float | -  | -  | N  | Y  |
18373    +------+----+----+----+----+
18374    |poly  | Y  | N  | -  | -  |
18375    +------+----+----+----+----+
18376 
18377   This is required for casting memory reference.  */
18378 #define __STRUCTN(t, sz, nelem)			\
18379   typedef struct t ## sz ## x ## nelem ## _t {	\
18380     t ## sz ## _t val[nelem];			\
18381   }  t ## sz ## x ## nelem ## _t;
18382 
18383 /* 2-element structs.  */
18384 __STRUCTN (int, 8, 2)
18385 __STRUCTN (int, 16, 2)
18386 __STRUCTN (uint, 8, 2)
18387 __STRUCTN (uint, 16, 2)
18388 __STRUCTN (poly, 8, 2)
18389 __STRUCTN (poly, 16, 2)
18390 /* 3-element structs.  */
18391 __STRUCTN (int, 8, 3)
18392 __STRUCTN (int, 16, 3)
18393 __STRUCTN (int, 32, 3)
18394 __STRUCTN (int, 64, 3)
18395 __STRUCTN (uint, 8, 3)
18396 __STRUCTN (uint, 16, 3)
18397 __STRUCTN (uint, 32, 3)
18398 __STRUCTN (uint, 64, 3)
18399 __STRUCTN (float, 32, 3)
18400 __STRUCTN (float, 64, 3)
18401 __STRUCTN (poly, 8, 3)
18402 __STRUCTN (poly, 16, 3)
18403 /* 4-element structs.  */
18404 __STRUCTN (int, 8, 4)
18405 __STRUCTN (int, 64, 4)
18406 __STRUCTN (uint, 8, 4)
18407 __STRUCTN (uint, 64, 4)
18408 __STRUCTN (poly, 8, 4)
18409 __STRUCTN (float, 64, 4)
18410 #undef __STRUCTN
18411 
18412 #define __LD2R_FUNC(rettype, structtype, ptrtype,			\
18413 		    regsuffix, funcsuffix, Q)				\
18414   __extension__ static __inline rettype					\
18415   __attribute__ ((__always_inline__)) 					\
18416   vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
18417   {									\
18418     rettype result;							\
18419     __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
18420 	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
18421 	     : "=Q"(result)						\
18422 	     : "Q"(*(const structtype *)ptr)				\
18423 	     : "memory", "v16", "v17");					\
18424     return result;							\
18425   }
18426 
18427 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
18428 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
18429 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
18430 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
18431 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
18432 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
18433 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
18434 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
18435 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
18436 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
18437 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
18438 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
18439 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
18440 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
18441 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
18442 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
18443 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
18444 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
18445 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
18446 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
18447 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
18448 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
18449 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
18450 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
18451 
18452 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix,			\
18453 			lnsuffix, funcsuffix, Q)			\
18454   __extension__ static __inline rettype					\
18455   __attribute__ ((__always_inline__))					\
18456   vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18457 				     rettype b, const int c)		\
18458   {									\
18459     rettype result;							\
18460     __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
18461 	     "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t"	\
18462 	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
18463 	     : "=Q"(result)						\
18464 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
18465 	     : "memory", "v16", "v17");					\
18466     return result;							\
18467   }
18468 
18469 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
18470 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
18471 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
18472 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
18473 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
18474 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
18475 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
18476 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
18477 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
18478 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
18479 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
18480 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
18481 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
18482 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
18483 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
18484 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
18485 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
18486 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
18487 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
18488 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
18489 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
18490 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
18491 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
18492 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
18493 
18494 #define __LD3R_FUNC(rettype, structtype, ptrtype,			\
18495 		    regsuffix, funcsuffix, Q)				\
18496   __extension__ static __inline rettype					\
18497   __attribute__ ((__always_inline__))					\
18498   vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
18499   {									\
18500     rettype result;							\
18501     __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
18502 	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
18503 	     : "=Q"(result)						\
18504 	     : "Q"(*(const structtype *)ptr)				\
18505 	     : "memory", "v16", "v17", "v18");				\
18506     return result;							\
18507   }
18508 
18509 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
18510 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
18511 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
18512 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
18513 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
18514 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
18515 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
18516 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
18517 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
18518 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
18519 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
18520 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
18521 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
18522 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
18523 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
18524 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
18525 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
18526 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
18527 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
18528 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
18529 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
18530 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
18531 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
18532 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
18533 
18534 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix,			\
18535 			lnsuffix, funcsuffix, Q)			\
18536   __extension__ static __inline rettype					\
18537   __attribute__ ((__always_inline__))					\
18538   vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18539 				     rettype b, const int c)		\
18540   {									\
18541     rettype result;							\
18542     __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
18543 	     "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t"	\
18544 	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
18545 	     : "=Q"(result)						\
18546 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
18547 	     : "memory", "v16", "v17", "v18");				\
18548     return result;							\
18549   }
18550 
18551 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
18552 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
18553 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
18554 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
18555 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
18556 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
18557 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
18558 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
18559 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
18560 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
18561 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
18562 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
18563 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
18564 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
18565 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
18566 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
18567 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
18568 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
18569 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
18570 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
18571 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
18572 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
18573 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
18574 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
18575 
18576 #define __LD4R_FUNC(rettype, structtype, ptrtype,			\
18577 		    regsuffix, funcsuffix, Q)				\
18578   __extension__ static __inline rettype					\
18579   __attribute__ ((__always_inline__))					\
18580   vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
18581   {									\
18582     rettype result;							\
18583     __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
18584 	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
18585 	     : "=Q"(result)						\
18586 	     : "Q"(*(const structtype *)ptr)				\
18587 	     : "memory", "v16", "v17", "v18", "v19");			\
18588     return result;							\
18589   }
18590 
18591 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
18592 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
18593 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
18594 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
18595 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
18596 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
18597 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
18598 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
18599 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
18600 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
18601 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
18602 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
18603 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
18604 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
18605 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
18606 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
18607 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
18608 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
18609 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
18610 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
18611 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
18612 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
18613 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
18614 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
18615 
18616 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix,			\
18617 			lnsuffix, funcsuffix, Q)			\
18618   __extension__ static __inline rettype					\
18619   __attribute__ ((__always_inline__))					\
18620   vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18621 				     rettype b, const int c)		\
18622   {									\
18623     rettype result;							\
18624     __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
18625 	     "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t"	\
18626 	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
18627 	     : "=Q"(result)						\
18628 	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
18629 	     : "memory", "v16", "v17", "v18", "v19");			\
18630     return result;							\
18631   }
18632 
18633 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
18634 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
18635 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
18636 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
18637 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
18638 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
18639 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
18640 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
18641 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
18642 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
18643 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
18644 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
18645 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
18646 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
18647 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
18648 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
18649 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
18650 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
18651 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
18652 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
18653 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
18654 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
18655 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
18656 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
18657 
18658 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
18659 			lnsuffix, funcsuffix, Q)			\
18660   __extension__ static __inline void					\
18661   __attribute__ ((__always_inline__))					\
18662   vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18663 				     intype b, const int c)		\
18664   {									\
18665     __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
18666 	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
18667 	     : "=Q"(*(intype *) ptr)					\
18668 	     : "Q"(b), "i"(c)						\
18669 	     : "memory", "v16", "v17");					\
18670   }
18671 
18672 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
18673 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
18674 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
18675 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
18676 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
18677 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
18678 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
18679 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
18680 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
18681 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
18682 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
18683 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
18684 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
18685 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
18686 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
18687 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
18688 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
18689 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
18690 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
18691 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
18692 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
18693 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
18694 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
18695 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
18696 
18697 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
18698 			lnsuffix, funcsuffix, Q)			\
18699   __extension__ static __inline void					\
18700   __attribute__ ((__always_inline__))					\
18701   vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18702 				     intype b, const int c)		\
18703   {									\
18704     __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
18705 	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
18706 	     : "=Q"(*(intype *) ptr)					\
18707 	     : "Q"(b), "i"(c)						\
18708 	     : "memory", "v16", "v17", "v18");				\
18709   }
18710 
18711 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
18712 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
18713 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
18714 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
18715 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
18716 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
18717 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
18718 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
18719 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
18720 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
18721 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
18722 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
18723 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
18724 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
18725 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
18726 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
18727 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
18728 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
18729 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
18730 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
18731 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
18732 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
18733 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
18734 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
18735 
18736 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
18737 			lnsuffix, funcsuffix, Q)			\
18738   __extension__ static __inline void					\
18739   __attribute__ ((__always_inline__))					\
18740   vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
18741 				     intype b, const int c)		\
18742   {									\
18743     __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
18744 	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
18745 	     : "=Q"(*(intype *) ptr)					\
18746 	     : "Q"(b), "i"(c)						\
18747 	     : "memory", "v16", "v17", "v18", "v19");			\
18748   }
18749 
18750 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
18751 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
18752 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
18753 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
18754 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
18755 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
18756 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
18757 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
18758 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
18759 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
18760 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
18761 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
18762 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
18763 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
18764 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
18765 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
18766 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
18767 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
18768 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
18769 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
18770 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
18771 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
18772 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
18773 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
18774 
18775 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddlv_s32(int32x2_t a)18776 vaddlv_s32 (int32x2_t a)
18777 {
18778   int64_t result;
18779   __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
18780   return result;
18781 }
18782 
18783 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddlv_u32(uint32x2_t a)18784 vaddlv_u32 (uint32x2_t a)
18785 {
18786   uint64_t result;
18787   __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
18788   return result;
18789 }
18790 
18791 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32(int32x2_t a)18792 vaddv_s32 (int32x2_t a)
18793 {
18794   int32_t result;
18795   __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18796   return result;
18797 }
18798 
18799 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddv_u32(uint32x2_t a)18800 vaddv_u32 (uint32x2_t a)
18801 {
18802   uint32_t result;
18803   __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18804   return result;
18805 }
18806 
18807 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmv_f32(float32x2_t a)18808 vmaxnmv_f32 (float32x2_t a)
18809 {
18810   float32_t result;
18811   __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18812   return result;
18813 }
18814 
18815 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmv_f32(float32x2_t a)18816 vminnmv_f32 (float32x2_t a)
18817 {
18818   float32_t result;
18819   __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18820   return result;
18821 }
18822 
18823 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxnmvq_f64(float64x2_t a)18824 vmaxnmvq_f64 (float64x2_t a)
18825 {
18826   float64_t result;
18827   __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
18828   return result;
18829 }
18830 
18831 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxv_s32(int32x2_t a)18832 vmaxv_s32 (int32x2_t a)
18833 {
18834   int32_t result;
18835   __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18836   return result;
18837 }
18838 
18839 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxv_u32(uint32x2_t a)18840 vmaxv_u32 (uint32x2_t a)
18841 {
18842   uint32_t result;
18843   __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18844   return result;
18845 }
18846 
18847 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminnmvq_f64(float64x2_t a)18848 vminnmvq_f64 (float64x2_t a)
18849 {
18850   float64_t result;
18851   __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
18852   return result;
18853 }
18854 
18855 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminv_s32(int32x2_t a)18856 vminv_s32 (int32x2_t a)
18857 {
18858   int32_t result;
18859   __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18860   return result;
18861 }
18862 
18863 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminv_u32(uint32x2_t a)18864 vminv_u32 (uint32x2_t a)
18865 {
18866   uint32_t result;
18867   __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18868   return result;
18869 }
18870 
18871 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vpaddd_s64(int64x2_t __a)18872 vpaddd_s64 (int64x2_t __a)
18873 {
18874   return __builtin_aarch64_addpdi (__a);
18875 }
18876 
18877 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_laneq_s16(int16x4_t __a,int16x8_t __b,const int __c)18878 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
18879 {
18880   return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
18881 }
18882 
18883 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_laneq_s32(int32x2_t __a,int32x4_t __b,const int __c)18884 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
18885 {
18886   return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
18887 }
18888 
18889 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_laneq_s16(int16x8_t __a,int16x8_t __b,const int __c)18890 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
18891 {
18892   return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
18893 }
18894 
18895 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_laneq_s32(int32x4_t __a,int32x4_t __b,const int __c)18896 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
18897 {
18898   return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
18899 }
18900 
18901 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_laneq_s16(int16x4_t __a,int16x8_t __b,const int __c)18902 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
18903 {
18904   return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
18905 }
18906 
18907 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_laneq_s32(int32x2_t __a,int32x4_t __b,const int __c)18908 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
18909 {
18910   return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
18911 }
18912 
18913 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_laneq_s16(int16x8_t __a,int16x8_t __b,const int __c)18914 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
18915 {
18916   return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
18917 }
18918 
18919 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_laneq_s32(int32x4_t __a,int32x4_t __b,const int __c)18920 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
18921 {
18922   return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
18923 }
18924 
18925 /* Table intrinsics.  */
18926 
18927 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl1_p8(poly8x16_t a,uint8x8_t b)18928 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
18929 {
18930   poly8x8_t result;
18931   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18932            : "=w"(result)
18933            : "w"(a), "w"(b)
18934            : /* No clobbers */);
18935   return result;
18936 }
18937 
18938 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl1_s8(int8x16_t a,int8x8_t b)18939 vqtbl1_s8 (int8x16_t a, int8x8_t b)
18940 {
18941   int8x8_t result;
18942   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18943            : "=w"(result)
18944            : "w"(a), "w"(b)
18945            : /* No clobbers */);
18946   return result;
18947 }
18948 
18949 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl1_u8(uint8x16_t a,uint8x8_t b)18950 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
18951 {
18952   uint8x8_t result;
18953   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18954            : "=w"(result)
18955            : "w"(a), "w"(b)
18956            : /* No clobbers */);
18957   return result;
18958 }
18959 
18960 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl1q_p8(poly8x16_t a,uint8x16_t b)18961 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
18962 {
18963   poly8x16_t result;
18964   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18965            : "=w"(result)
18966            : "w"(a), "w"(b)
18967            : /* No clobbers */);
18968   return result;
18969 }
18970 
18971 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl1q_s8(int8x16_t a,int8x16_t b)18972 vqtbl1q_s8 (int8x16_t a, int8x16_t b)
18973 {
18974   int8x16_t result;
18975   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18976            : "=w"(result)
18977            : "w"(a), "w"(b)
18978            : /* No clobbers */);
18979   return result;
18980 }
18981 
18982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl1q_u8(uint8x16_t a,uint8x16_t b)18983 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
18984 {
18985   uint8x16_t result;
18986   __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18987            : "=w"(result)
18988            : "w"(a), "w"(b)
18989            : /* No clobbers */);
18990   return result;
18991 }
18992 
18993 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl2_s8(int8x16x2_t tab,int8x8_t idx)18994 vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx)
18995 {
18996   int8x8_t result;
18997   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18998 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18999 	   :"=w"(result)
19000 	   :"Q"(tab),"w"(idx)
19001 	   :"memory", "v16", "v17");
19002   return result;
19003 }
19004 
19005 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl2_u8(uint8x16x2_t tab,uint8x8_t idx)19006 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
19007 {
19008   uint8x8_t result;
19009   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19010 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19011 	   :"=w"(result)
19012 	   :"Q"(tab),"w"(idx)
19013 	   :"memory", "v16", "v17");
19014   return result;
19015 }
19016 
19017 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl2_p8(poly8x16x2_t tab,uint8x8_t idx)19018 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
19019 {
19020   poly8x8_t result;
19021   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19022 	   "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19023 	   :"=w"(result)
19024 	   :"Q"(tab),"w"(idx)
19025 	   :"memory", "v16", "v17");
19026   return result;
19027 }
19028 
19029 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl2q_s8(int8x16x2_t tab,int8x16_t idx)19030 vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx)
19031 {
19032   int8x16_t result;
19033   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19034 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19035 	   :"=w"(result)
19036 	   :"Q"(tab),"w"(idx)
19037 	   :"memory", "v16", "v17");
19038   return result;
19039 }
19040 
19041 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl2q_u8(uint8x16x2_t tab,uint8x16_t idx)19042 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
19043 {
19044   uint8x16_t result;
19045   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19046 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19047 	   :"=w"(result)
19048 	   :"Q"(tab),"w"(idx)
19049 	   :"memory", "v16", "v17");
19050   return result;
19051 }
19052 
19053 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl2q_p8(poly8x16x2_t tab,uint8x16_t idx)19054 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
19055 {
19056   poly8x16_t result;
19057   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19058 	   "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19059 	   :"=w"(result)
19060 	   :"Q"(tab),"w"(idx)
19061 	   :"memory", "v16", "v17");
19062   return result;
19063 }
19064 
19065 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl3_s8(int8x16x3_t tab,int8x8_t idx)19066 vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx)
19067 {
19068   int8x8_t result;
19069   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19070 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19071 	   :"=w"(result)
19072 	   :"Q"(tab),"w"(idx)
19073 	   :"memory", "v16", "v17", "v18");
19074   return result;
19075 }
19076 
19077 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl3_u8(uint8x16x3_t tab,uint8x8_t idx)19078 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
19079 {
19080   uint8x8_t result;
19081   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19082 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19083 	   :"=w"(result)
19084 	   :"Q"(tab),"w"(idx)
19085 	   :"memory", "v16", "v17", "v18");
19086   return result;
19087 }
19088 
19089 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl3_p8(poly8x16x3_t tab,uint8x8_t idx)19090 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
19091 {
19092   poly8x8_t result;
19093   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19094 	   "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19095 	   :"=w"(result)
19096 	   :"Q"(tab),"w"(idx)
19097 	   :"memory", "v16", "v17", "v18");
19098   return result;
19099 }
19100 
19101 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl3q_s8(int8x16x3_t tab,int8x16_t idx)19102 vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx)
19103 {
19104   int8x16_t result;
19105   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19106 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19107 	   :"=w"(result)
19108 	   :"Q"(tab),"w"(idx)
19109 	   :"memory", "v16", "v17", "v18");
19110   return result;
19111 }
19112 
19113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl3q_u8(uint8x16x3_t tab,uint8x16_t idx)19114 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
19115 {
19116   uint8x16_t result;
19117   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19118 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19119 	   :"=w"(result)
19120 	   :"Q"(tab),"w"(idx)
19121 	   :"memory", "v16", "v17", "v18");
19122   return result;
19123 }
19124 
19125 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl3q_p8(poly8x16x3_t tab,uint8x16_t idx)19126 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
19127 {
19128   poly8x16_t result;
19129   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19130 	   "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19131 	   :"=w"(result)
19132 	   :"Q"(tab),"w"(idx)
19133 	   :"memory", "v16", "v17", "v18");
19134   return result;
19135 }
19136 
19137 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbl4_s8(int8x16x4_t tab,int8x8_t idx)19138 vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx)
19139 {
19140   int8x8_t result;
19141   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19142 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19143 	   :"=w"(result)
19144 	   :"Q"(tab),"w"(idx)
19145 	   :"memory", "v16", "v17", "v18", "v19");
19146   return result;
19147 }
19148 
19149 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbl4_u8(uint8x16x4_t tab,uint8x8_t idx)19150 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
19151 {
19152   uint8x8_t result;
19153   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19154 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19155 	   :"=w"(result)
19156 	   :"Q"(tab),"w"(idx)
19157 	   :"memory", "v16", "v17", "v18", "v19");
19158   return result;
19159 }
19160 
19161 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbl4_p8(poly8x16x4_t tab,uint8x8_t idx)19162 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
19163 {
19164   poly8x8_t result;
19165   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19166 	   "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19167 	   :"=w"(result)
19168 	   :"Q"(tab),"w"(idx)
19169 	   :"memory", "v16", "v17", "v18", "v19");
19170   return result;
19171 }
19172 
19173 
19174 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbl4q_s8(int8x16x4_t tab,int8x16_t idx)19175 vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx)
19176 {
19177   int8x16_t result;
19178   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19179 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19180 	   :"=w"(result)
19181 	   :"Q"(tab),"w"(idx)
19182 	   :"memory", "v16", "v17", "v18", "v19");
19183   return result;
19184 }
19185 
19186 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbl4q_u8(uint8x16x4_t tab,uint8x16_t idx)19187 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
19188 {
19189   uint8x16_t result;
19190   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19191 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19192 	   :"=w"(result)
19193 	   :"Q"(tab),"w"(idx)
19194 	   :"memory", "v16", "v17", "v18", "v19");
19195   return result;
19196 }
19197 
19198 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbl4q_p8(poly8x16x4_t tab,uint8x16_t idx)19199 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
19200 {
19201   poly8x16_t result;
19202   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19203 	   "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19204 	   :"=w"(result)
19205 	   :"Q"(tab),"w"(idx)
19206 	   :"memory", "v16", "v17", "v18", "v19");
19207   return result;
19208 }
19209 
19210 
19211 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx1_s8(int8x8_t r,int8x16_t tab,int8x8_t idx)19212 vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx)
19213 {
19214   int8x8_t result = r;
19215   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19216            : "+w"(result)
19217            : "w"(tab), "w"(idx)
19218            : /* No clobbers */);
19219   return result;
19220 }
19221 
19222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx1_u8(uint8x8_t r,uint8x16_t tab,uint8x8_t idx)19223 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
19224 {
19225   uint8x8_t result = r;
19226   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19227            : "+w"(result)
19228            : "w"(tab), "w"(idx)
19229            : /* No clobbers */);
19230   return result;
19231 }
19232 
19233 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx1_p8(poly8x8_t r,poly8x16_t tab,uint8x8_t idx)19234 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
19235 {
19236   poly8x8_t result = r;
19237   __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19238            : "+w"(result)
19239            : "w"(tab), "w"(idx)
19240            : /* No clobbers */);
19241   return result;
19242 }
19243 
19244 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx1q_s8(int8x16_t r,int8x16_t tab,int8x16_t idx)19245 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx)
19246 {
19247   int8x16_t result = r;
19248   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19249            : "+w"(result)
19250            : "w"(tab), "w"(idx)
19251            : /* No clobbers */);
19252   return result;
19253 }
19254 
19255 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx1q_u8(uint8x16_t r,uint8x16_t tab,uint8x16_t idx)19256 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
19257 {
19258   uint8x16_t result = r;
19259   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19260            : "+w"(result)
19261            : "w"(tab), "w"(idx)
19262            : /* No clobbers */);
19263   return result;
19264 }
19265 
19266 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx1q_p8(poly8x16_t r,poly8x16_t tab,uint8x16_t idx)19267 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
19268 {
19269   poly8x16_t result = r;
19270   __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19271            : "+w"(result)
19272            : "w"(tab), "w"(idx)
19273            : /* No clobbers */);
19274   return result;
19275 }
19276 
19277 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx2_s8(int8x8_t r,int8x16x2_t tab,int8x8_t idx)19278 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx)
19279 {
19280   int8x8_t result = r;
19281   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19282 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19283 	   :"+w"(result)
19284 	   :"Q"(tab),"w"(idx)
19285 	   :"memory", "v16", "v17");
19286   return result;
19287 }
19288 
19289 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx2_u8(uint8x8_t r,uint8x16x2_t tab,uint8x8_t idx)19290 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
19291 {
19292   uint8x8_t result = r;
19293   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19294 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19295 	   :"+w"(result)
19296 	   :"Q"(tab),"w"(idx)
19297 	   :"memory", "v16", "v17");
19298   return result;
19299 }
19300 
19301 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx2_p8(poly8x8_t r,poly8x16x2_t tab,uint8x8_t idx)19302 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
19303 {
19304   poly8x8_t result = r;
19305   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19306 	   "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19307 	   :"+w"(result)
19308 	   :"Q"(tab),"w"(idx)
19309 	   :"memory", "v16", "v17");
19310   return result;
19311 }
19312 
19313 
19314 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx2q_s8(int8x16_t r,int8x16x2_t tab,int8x16_t idx)19315 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx)
19316 {
19317   int8x16_t result = r;
19318   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19319 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19320 	   :"+w"(result)
19321 	   :"Q"(tab),"w"(idx)
19322 	   :"memory", "v16", "v17");
19323   return result;
19324 }
19325 
19326 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx2q_u8(uint8x16_t r,uint8x16x2_t tab,uint8x16_t idx)19327 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
19328 {
19329   uint8x16_t result = r;
19330   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19331 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19332 	   :"+w"(result)
19333 	   :"Q"(tab),"w"(idx)
19334 	   :"memory", "v16", "v17");
19335   return result;
19336 }
19337 
19338 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx2q_p8(poly8x16_t r,poly8x16x2_t tab,uint8x16_t idx)19339 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
19340 {
19341   poly8x16_t result = r;
19342   __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19343 	   "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19344 	   :"+w"(result)
19345 	   :"Q"(tab),"w"(idx)
19346 	   :"memory", "v16", "v17");
19347   return result;
19348 }
19349 
19350 
19351 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx3_s8(int8x8_t r,int8x16x3_t tab,int8x8_t idx)19352 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx)
19353 {
19354   int8x8_t result = r;
19355   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19356 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19357 	   :"+w"(result)
19358 	   :"Q"(tab),"w"(idx)
19359 	   :"memory", "v16", "v17", "v18");
19360   return result;
19361 }
19362 
19363 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx3_u8(uint8x8_t r,uint8x16x3_t tab,uint8x8_t idx)19364 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
19365 {
19366   uint8x8_t result = r;
19367   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19368 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19369 	   :"+w"(result)
19370 	   :"Q"(tab),"w"(idx)
19371 	   :"memory", "v16", "v17", "v18");
19372   return result;
19373 }
19374 
19375 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx3_p8(poly8x8_t r,poly8x16x3_t tab,uint8x8_t idx)19376 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
19377 {
19378   poly8x8_t result = r;
19379   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19380 	   "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19381 	   :"+w"(result)
19382 	   :"Q"(tab),"w"(idx)
19383 	   :"memory", "v16", "v17", "v18");
19384   return result;
19385 }
19386 
19387 
19388 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx3q_s8(int8x16_t r,int8x16x3_t tab,int8x16_t idx)19389 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx)
19390 {
19391   int8x16_t result = r;
19392   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19393 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19394 	   :"+w"(result)
19395 	   :"Q"(tab),"w"(idx)
19396 	   :"memory", "v16", "v17", "v18");
19397   return result;
19398 }
19399 
19400 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx3q_u8(uint8x16_t r,uint8x16x3_t tab,uint8x16_t idx)19401 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
19402 {
19403   uint8x16_t result = r;
19404   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19405 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19406 	   :"+w"(result)
19407 	   :"Q"(tab),"w"(idx)
19408 	   :"memory", "v16", "v17", "v18");
19409   return result;
19410 }
19411 
19412 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx3q_p8(poly8x16_t r,poly8x16x3_t tab,uint8x16_t idx)19413 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
19414 {
19415   poly8x16_t result = r;
19416   __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19417 	   "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19418 	   :"+w"(result)
19419 	   :"Q"(tab),"w"(idx)
19420 	   :"memory", "v16", "v17", "v18");
19421   return result;
19422 }
19423 
19424 
19425 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqtbx4_s8(int8x8_t r,int8x16x4_t tab,int8x8_t idx)19426 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx)
19427 {
19428   int8x8_t result = r;
19429   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19430 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19431 	   :"+w"(result)
19432 	   :"Q"(tab),"w"(idx)
19433 	   :"memory", "v16", "v17", "v18", "v19");
19434   return result;
19435 }
19436 
19437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqtbx4_u8(uint8x8_t r,uint8x16x4_t tab,uint8x8_t idx)19438 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
19439 {
19440   uint8x8_t result = r;
19441   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19442 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19443 	   :"+w"(result)
19444 	   :"Q"(tab),"w"(idx)
19445 	   :"memory", "v16", "v17", "v18", "v19");
19446   return result;
19447 }
19448 
19449 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vqtbx4_p8(poly8x8_t r,poly8x16x4_t tab,uint8x8_t idx)19450 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
19451 {
19452   poly8x8_t result = r;
19453   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19454 	   "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19455 	   :"+w"(result)
19456 	   :"Q"(tab),"w"(idx)
19457 	   :"memory", "v16", "v17", "v18", "v19");
19458   return result;
19459 }
19460 
19461 
19462 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqtbx4q_s8(int8x16_t r,int8x16x4_t tab,int8x16_t idx)19463 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx)
19464 {
19465   int8x16_t result = r;
19466   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19467 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19468 	   :"+w"(result)
19469 	   :"Q"(tab),"w"(idx)
19470 	   :"memory", "v16", "v17", "v18", "v19");
19471   return result;
19472 }
19473 
19474 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqtbx4q_u8(uint8x16_t r,uint8x16x4_t tab,uint8x16_t idx)19475 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
19476 {
19477   uint8x16_t result = r;
19478   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19479 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19480 	   :"+w"(result)
19481 	   :"Q"(tab),"w"(idx)
19482 	   :"memory", "v16", "v17", "v18", "v19");
19483   return result;
19484 }
19485 
19486 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vqtbx4q_p8(poly8x16_t r,poly8x16x4_t tab,uint8x16_t idx)19487 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
19488 {
19489   poly8x16_t result = r;
19490   __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19491 	   "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19492 	   :"+w"(result)
19493 	   :"Q"(tab),"w"(idx)
19494 	   :"memory", "v16", "v17", "v18", "v19");
19495   return result;
19496 }
19497 
19498 /* V7 legacy table intrinsics.  */
19499 
19500 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl1_s8(int8x8_t tab,int8x8_t idx)19501 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
19502 {
19503   int8x8_t result;
19504   int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
19505   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19506            : "=w"(result)
19507            : "w"(temp), "w"(idx)
19508            : /* No clobbers */);
19509   return result;
19510 }
19511 
19512 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl1_u8(uint8x8_t tab,uint8x8_t idx)19513 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
19514 {
19515   uint8x8_t result;
19516   uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
19517   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19518            : "=w"(result)
19519            : "w"(temp), "w"(idx)
19520            : /* No clobbers */);
19521   return result;
19522 }
19523 
19524 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl1_p8(poly8x8_t tab,uint8x8_t idx)19525 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
19526 {
19527   poly8x8_t result;
19528   poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
19529   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19530            : "=w"(result)
19531            : "w"(temp), "w"(idx)
19532            : /* No clobbers */);
19533   return result;
19534 }
19535 
19536 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl2_s8(int8x8x2_t tab,int8x8_t idx)19537 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
19538 {
19539   int8x8_t result;
19540   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
19541   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19542            : "=w"(result)
19543            : "w"(temp), "w"(idx)
19544            : /* No clobbers */);
19545   return result;
19546 }
19547 
19548 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl2_u8(uint8x8x2_t tab,uint8x8_t idx)19549 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
19550 {
19551   uint8x8_t result;
19552   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
19553   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19554            : "=w"(result)
19555            : "w"(temp), "w"(idx)
19556            : /* No clobbers */);
19557   return result;
19558 }
19559 
19560 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl2_p8(poly8x8x2_t tab,uint8x8_t idx)19561 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
19562 {
19563   poly8x8_t result;
19564   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
19565   __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19566            : "=w"(result)
19567            : "w"(temp), "w"(idx)
19568            : /* No clobbers */);
19569   return result;
19570 }
19571 
19572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl3_s8(int8x8x3_t tab,int8x8_t idx)19573 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
19574 {
19575   int8x8_t result;
19576   int8x16x2_t temp;
19577   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19578   temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
19579   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19580 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19581            : "=w"(result)
19582            : "Q"(temp), "w"(idx)
19583            : "v16", "v17", "memory");
19584   return result;
19585 }
19586 
19587 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl3_u8(uint8x8x3_t tab,uint8x8_t idx)19588 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
19589 {
19590   uint8x8_t result;
19591   uint8x16x2_t temp;
19592   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19593   temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
19594   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19595 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19596            : "=w"(result)
19597            : "Q"(temp), "w"(idx)
19598            : "v16", "v17", "memory");
19599   return result;
19600 }
19601 
19602 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl3_p8(poly8x8x3_t tab,uint8x8_t idx)19603 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
19604 {
19605   poly8x8_t result;
19606   poly8x16x2_t temp;
19607   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19608   temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
19609   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19610 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19611            : "=w"(result)
19612            : "Q"(temp), "w"(idx)
19613            : "v16", "v17", "memory");
19614   return result;
19615 }
19616 
19617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl4_s8(int8x8x4_t tab,int8x8_t idx)19618 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
19619 {
19620   int8x8_t result;
19621   int8x16x2_t temp;
19622   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19623   temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
19624   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19625 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19626            : "=w"(result)
19627            : "Q"(temp), "w"(idx)
19628            : "v16", "v17", "memory");
19629   return result;
19630 }
19631 
19632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl4_u8(uint8x8x4_t tab,uint8x8_t idx)19633 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
19634 {
19635   uint8x8_t result;
19636   uint8x16x2_t temp;
19637   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19638   temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
19639   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19640 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19641            : "=w"(result)
19642            : "Q"(temp), "w"(idx)
19643            : "v16", "v17", "memory");
19644   return result;
19645 }
19646 
19647 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl4_p8(poly8x8x4_t tab,uint8x8_t idx)19648 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
19649 {
19650   poly8x8_t result;
19651   poly8x16x2_t temp;
19652   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19653   temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
19654   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19655 	   "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19656            : "=w"(result)
19657            : "Q"(temp), "w"(idx)
19658            : "v16", "v17", "memory");
19659   return result;
19660 }
19661 
19662 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx1_s8(int8x8_t r,int8x8_t tab,int8x8_t idx)19663 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
19664 {
19665   int8x8_t result;
19666   int8x8_t tmp1;
19667   int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
19668   __asm__ ("movi %0.8b, 8\n\t"
19669 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19670 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19671 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19672            : "+w"(result), "=w"(tmp1)
19673            : "w"(temp), "w"(idx), "w"(r)
19674            : /* No clobbers */);
19675   return result;
19676 }
19677 
19678 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx1_u8(uint8x8_t r,uint8x8_t tab,uint8x8_t idx)19679 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
19680 {
19681   uint8x8_t result;
19682   uint8x8_t tmp1;
19683   uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
19684   __asm__ ("movi %0.8b, 8\n\t"
19685 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19686 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19687 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19688            : "+w"(result), "=w"(tmp1)
19689            : "w"(temp), "w"(idx), "w"(r)
19690            : /* No clobbers */);
19691   return result;
19692 }
19693 
19694 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx1_p8(poly8x8_t r,poly8x8_t tab,uint8x8_t idx)19695 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
19696 {
19697   poly8x8_t result;
19698   poly8x8_t tmp1;
19699   poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
19700   __asm__ ("movi %0.8b, 8\n\t"
19701 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19702 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19703 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19704            : "+w"(result), "=w"(tmp1)
19705            : "w"(temp), "w"(idx), "w"(r)
19706            : /* No clobbers */);
19707   return result;
19708 }
19709 
19710 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx2_s8(int8x8_t r,int8x8x2_t tab,int8x8_t idx)19711 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
19712 {
19713   int8x8_t result = r;
19714   int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
19715   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19716            : "+w"(result)
19717            : "w"(temp), "w"(idx)
19718            : /* No clobbers */);
19719   return result;
19720 }
19721 
19722 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx2_u8(uint8x8_t r,uint8x8x2_t tab,uint8x8_t idx)19723 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
19724 {
19725   uint8x8_t result = r;
19726   uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
19727   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19728            : "+w"(result)
19729            : "w"(temp), "w"(idx)
19730            : /* No clobbers */);
19731   return result;
19732 }
19733 
19734 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx2_p8(poly8x8_t r,poly8x8x2_t tab,uint8x8_t idx)19735 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
19736 {
19737   poly8x8_t result = r;
19738   poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
19739   __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19740            : "+w"(result)
19741            : "w"(temp), "w"(idx)
19742            : /* No clobbers */);
19743   return result;
19744 }
19745 
19746 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx3_s8(int8x8_t r,int8x8x3_t tab,int8x8_t idx)19747 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
19748 {
19749   int8x8_t result;
19750   int8x8_t tmp1;
19751   int8x16x2_t temp;
19752   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19753   temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
19754   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19755 	   "movi %0.8b, 24\n\t"
19756 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19757 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19758 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19759            : "+w"(result), "=w"(tmp1)
19760            : "Q"(temp), "w"(idx), "w"(r)
19761            : "v16", "v17", "memory");
19762   return result;
19763 }
19764 
19765 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx3_u8(uint8x8_t r,uint8x8x3_t tab,uint8x8_t idx)19766 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
19767 {
19768   uint8x8_t result;
19769   uint8x8_t tmp1;
19770   uint8x16x2_t temp;
19771   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19772   temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
19773   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19774 	   "movi %0.8b, 24\n\t"
19775 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19776 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19777 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19778            : "+w"(result), "=w"(tmp1)
19779            : "Q"(temp), "w"(idx), "w"(r)
19780            : "v16", "v17", "memory");
19781   return result;
19782 }
19783 
19784 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx3_p8(poly8x8_t r,poly8x8x3_t tab,uint8x8_t idx)19785 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
19786 {
19787   poly8x8_t result;
19788   poly8x8_t tmp1;
19789   poly8x16x2_t temp;
19790   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19791   temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
19792   __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19793 	   "movi %0.8b, 24\n\t"
19794 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
19795 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19796 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
19797            : "+w"(result), "=w"(tmp1)
19798            : "Q"(temp), "w"(idx), "w"(r)
19799            : "v16", "v17", "memory");
19800   return result;
19801 }
19802 
19803 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx4_s8(int8x8_t r,int8x8x4_t tab,int8x8_t idx)19804 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
19805 {
19806   int8x8_t result = r;
19807   int8x16x2_t temp;
19808   temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19809   temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
19810   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19811 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19812            : "+w"(result)
19813            : "Q"(temp), "w"(idx)
19814            : "v16", "v17", "memory");
19815   return result;
19816 }
19817 
19818 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx4_u8(uint8x8_t r,uint8x8x4_t tab,uint8x8_t idx)19819 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
19820 {
19821   uint8x8_t result = r;
19822   uint8x16x2_t temp;
19823   temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19824   temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
19825   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19826 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19827            : "+w"(result)
19828            : "Q"(temp), "w"(idx)
19829            : "v16", "v17", "memory");
19830   return result;
19831 }
19832 
19833 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx4_p8(poly8x8_t r,poly8x8x4_t tab,uint8x8_t idx)19834 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
19835 {
19836   poly8x8_t result = r;
19837   poly8x16x2_t temp;
19838   temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19839   temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
19840   __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19841 	   "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19842            : "+w"(result)
19843            : "Q"(temp), "w"(idx)
19844            : "v16", "v17", "memory");
19845   return result;
19846 }
19847 
19848 /* End of temporary inline asm.  */
19849 
19850 /* Start of optimal implementations in approved order.  */
19851 
19852 /* vadd */
19853 
19854 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vaddd_s64(int64x1_t __a,int64x1_t __b)19855 vaddd_s64 (int64x1_t __a, int64x1_t __b)
19856 {
19857   return __a + __b;
19858 }
19859 
19860 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vaddd_u64(uint64x1_t __a,uint64x1_t __b)19861 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
19862 {
19863   return __a + __b;
19864 }
19865 
19866 /* vceq */
19867 
19868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_p8(poly8x8_t __a,poly8x8_t __b)19869 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
19870 {
19871   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
19872 						 (int8x8_t) __b);
19873 }
19874 
19875 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_s8(int8x8_t __a,int8x8_t __b)19876 vceq_s8 (int8x8_t __a, int8x8_t __b)
19877 {
19878   return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
19879 }
19880 
19881 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceq_s16(int16x4_t __a,int16x4_t __b)19882 vceq_s16 (int16x4_t __a, int16x4_t __b)
19883 {
19884   return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
19885 }
19886 
19887 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_s32(int32x2_t __a,int32x2_t __b)19888 vceq_s32 (int32x2_t __a, int32x2_t __b)
19889 {
19890   return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
19891 }
19892 
19893 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_s64(int64x1_t __a,int64x1_t __b)19894 vceq_s64 (int64x1_t __a, int64x1_t __b)
19895 {
19896   return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19897 }
19898 
19899 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_u8(uint8x8_t __a,uint8x8_t __b)19900 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
19901 {
19902   return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
19903 						 (int8x8_t) __b);
19904 }
19905 
19906 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceq_u16(uint16x4_t __a,uint16x4_t __b)19907 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
19908 {
19909   return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
19910 						  (int16x4_t) __b);
19911 }
19912 
19913 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_u32(uint32x2_t __a,uint32x2_t __b)19914 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
19915 {
19916   return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
19917 						  (int32x2_t) __b);
19918 }
19919 
19920 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_u64(uint64x1_t __a,uint64x1_t __b)19921 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
19922 {
19923   return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
19924 						(int64x1_t) __b);
19925 }
19926 
19927 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_p8(poly8x16_t __a,poly8x16_t __b)19928 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
19929 {
19930   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
19931 						   (int8x16_t) __b);
19932 }
19933 
19934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_s8(int8x16_t __a,int8x16_t __b)19935 vceqq_s8 (int8x16_t __a, int8x16_t __b)
19936 {
19937   return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
19938 }
19939 
19940 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqq_s16(int16x8_t __a,int16x8_t __b)19941 vceqq_s16 (int16x8_t __a, int16x8_t __b)
19942 {
19943   return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
19944 }
19945 
19946 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_s32(int32x4_t __a,int32x4_t __b)19947 vceqq_s32 (int32x4_t __a, int32x4_t __b)
19948 {
19949   return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
19950 }
19951 
19952 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_s64(int64x2_t __a,int64x2_t __b)19953 vceqq_s64 (int64x2_t __a, int64x2_t __b)
19954 {
19955   return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
19956 }
19957 
19958 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vceqq_u8(uint8x16_t __a,uint8x16_t __b)19959 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
19960 {
19961   return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
19962 						   (int8x16_t) __b);
19963 }
19964 
19965 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqq_u16(uint16x8_t __a,uint16x8_t __b)19966 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
19967 {
19968   return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
19969 						  (int16x8_t) __b);
19970 }
19971 
19972 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vceqq_u32(uint32x4_t __a,uint32x4_t __b)19973 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
19974 {
19975   return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
19976 						  (int32x4_t) __b);
19977 }
19978 
19979 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vceqq_u64(uint64x2_t __a,uint64x2_t __b)19980 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
19981 {
19982   return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
19983 						  (int64x2_t) __b);
19984 }
19985 
19986 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqd_s64(int64x1_t __a,int64x1_t __b)19987 vceqd_s64 (int64x1_t __a, int64x1_t __b)
19988 {
19989   return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19990 }
19991 
19992 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqd_u64(uint64x1_t __a,uint64x1_t __b)19993 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
19994 {
19995   return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19996 }
19997 
19998 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqzd_s64(int64x1_t __a)19999 vceqzd_s64 (int64x1_t __a)
20000 {
20001   return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
20002 }
20003 
20004 /* vcge */
20005 
20006 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_s8(int8x8_t __a,int8x8_t __b)20007 vcge_s8 (int8x8_t __a, int8x8_t __b)
20008 {
20009   return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
20010 }
20011 
20012 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_s16(int16x4_t __a,int16x4_t __b)20013 vcge_s16 (int16x4_t __a, int16x4_t __b)
20014 {
20015   return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
20016 }
20017 
20018 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_s32(int32x2_t __a,int32x2_t __b)20019 vcge_s32 (int32x2_t __a, int32x2_t __b)
20020 {
20021   return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
20022 }
20023 
20024 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_s64(int64x1_t __a,int64x1_t __b)20025 vcge_s64 (int64x1_t __a, int64x1_t __b)
20026 {
20027   return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
20028 }
20029 
20030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_u8(uint8x8_t __a,uint8x8_t __b)20031 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
20032 {
20033   return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
20034 						 (int8x8_t) __b);
20035 }
20036 
20037 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_u16(uint16x4_t __a,uint16x4_t __b)20038 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
20039 {
20040   return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
20041 						  (int16x4_t) __b);
20042 }
20043 
20044 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_u32(uint32x2_t __a,uint32x2_t __b)20045 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
20046 {
20047   return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
20048 						  (int32x2_t) __b);
20049 }
20050 
20051 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_u64(uint64x1_t __a,uint64x1_t __b)20052 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
20053 {
20054   return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
20055 						(int64x1_t) __b);
20056 }
20057 
20058 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_s8(int8x16_t __a,int8x16_t __b)20059 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
20060 {
20061   return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
20062 }
20063 
20064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_s16(int16x8_t __a,int16x8_t __b)20065 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
20066 {
20067   return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
20068 }
20069 
20070 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_s32(int32x4_t __a,int32x4_t __b)20071 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
20072 {
20073   return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
20074 }
20075 
20076 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_s64(int64x2_t __a,int64x2_t __b)20077 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
20078 {
20079   return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
20080 }
20081 
20082 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_u8(uint8x16_t __a,uint8x16_t __b)20083 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
20084 {
20085   return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
20086 						   (int8x16_t) __b);
20087 }
20088 
20089 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_u16(uint16x8_t __a,uint16x8_t __b)20090 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
20091 {
20092   return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
20093 						  (int16x8_t) __b);
20094 }
20095 
20096 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_u32(uint32x4_t __a,uint32x4_t __b)20097 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
20098 {
20099   return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
20100 						  (int32x4_t) __b);
20101 }
20102 
20103 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_u64(uint64x2_t __a,uint64x2_t __b)20104 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
20105 {
20106   return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
20107 						  (int64x2_t) __b);
20108 }
20109 
20110 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcged_s64(int64x1_t __a,int64x1_t __b)20111 vcged_s64 (int64x1_t __a, int64x1_t __b)
20112 {
20113   return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
20114 }
20115 
20116 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcged_u64(uint64x1_t __a,uint64x1_t __b)20117 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
20118 {
20119   return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
20120 						(int64x1_t) __b);
20121 }
20122 
20123 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgezd_s64(int64x1_t __a)20124 vcgezd_s64 (int64x1_t __a)
20125 {
20126   return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
20127 }
20128 
20129 /* vcgt */
20130 
20131 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_s8(int8x8_t __a,int8x8_t __b)20132 vcgt_s8 (int8x8_t __a, int8x8_t __b)
20133 {
20134   return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
20135 }
20136 
20137 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_s16(int16x4_t __a,int16x4_t __b)20138 vcgt_s16 (int16x4_t __a, int16x4_t __b)
20139 {
20140   return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
20141 }
20142 
20143 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_s32(int32x2_t __a,int32x2_t __b)20144 vcgt_s32 (int32x2_t __a, int32x2_t __b)
20145 {
20146   return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
20147 }
20148 
20149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_s64(int64x1_t __a,int64x1_t __b)20150 vcgt_s64 (int64x1_t __a, int64x1_t __b)
20151 {
20152   return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
20153 }
20154 
20155 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_u8(uint8x8_t __a,uint8x8_t __b)20156 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
20157 {
20158   return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
20159 						 (int8x8_t) __b);
20160 }
20161 
20162 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_u16(uint16x4_t __a,uint16x4_t __b)20163 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
20164 {
20165   return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
20166 						  (int16x4_t) __b);
20167 }
20168 
20169 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_u32(uint32x2_t __a,uint32x2_t __b)20170 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
20171 {
20172   return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
20173 						  (int32x2_t) __b);
20174 }
20175 
20176 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_u64(uint64x1_t __a,uint64x1_t __b)20177 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
20178 {
20179   return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
20180 						(int64x1_t) __b);
20181 }
20182 
20183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_s8(int8x16_t __a,int8x16_t __b)20184 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
20185 {
20186   return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
20187 }
20188 
20189 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_s16(int16x8_t __a,int16x8_t __b)20190 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
20191 {
20192   return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
20193 }
20194 
20195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_s32(int32x4_t __a,int32x4_t __b)20196 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
20197 {
20198   return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
20199 }
20200 
20201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_s64(int64x2_t __a,int64x2_t __b)20202 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
20203 {
20204   return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
20205 }
20206 
20207 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_u8(uint8x16_t __a,uint8x16_t __b)20208 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
20209 {
20210   return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
20211 						   (int8x16_t) __b);
20212 }
20213 
20214 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_u16(uint16x8_t __a,uint16x8_t __b)20215 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
20216 {
20217   return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
20218 						  (int16x8_t) __b);
20219 }
20220 
20221 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_u32(uint32x4_t __a,uint32x4_t __b)20222 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
20223 {
20224   return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
20225 						  (int32x4_t) __b);
20226 }
20227 
20228 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_u64(uint64x2_t __a,uint64x2_t __b)20229 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
20230 {
20231   return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
20232 						  (int64x2_t) __b);
20233 }
20234 
20235 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtd_s64(int64x1_t __a,int64x1_t __b)20236 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
20237 {
20238   return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
20239 }
20240 
20241 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtd_u64(uint64x1_t __a,uint64x1_t __b)20242 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
20243 {
20244   return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
20245 						(int64x1_t) __b);
20246 }
20247 
20248 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtzd_s64(int64x1_t __a)20249 vcgtzd_s64 (int64x1_t __a)
20250 {
20251   return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
20252 }
20253 
20254 /* vcle */
20255 
20256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_s8(int8x8_t __a,int8x8_t __b)20257 vcle_s8 (int8x8_t __a, int8x8_t __b)
20258 {
20259   return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
20260 }
20261 
20262 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_s16(int16x4_t __a,int16x4_t __b)20263 vcle_s16 (int16x4_t __a, int16x4_t __b)
20264 {
20265   return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
20266 }
20267 
20268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_s32(int32x2_t __a,int32x2_t __b)20269 vcle_s32 (int32x2_t __a, int32x2_t __b)
20270 {
20271   return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
20272 }
20273 
20274 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_s64(int64x1_t __a,int64x1_t __b)20275 vcle_s64 (int64x1_t __a, int64x1_t __b)
20276 {
20277   return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
20278 }
20279 
20280 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_u8(uint8x8_t __a,uint8x8_t __b)20281 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
20282 {
20283   return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
20284 						 (int8x8_t) __a);
20285 }
20286 
20287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_u16(uint16x4_t __a,uint16x4_t __b)20288 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
20289 {
20290   return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
20291 						  (int16x4_t) __a);
20292 }
20293 
20294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_u32(uint32x2_t __a,uint32x2_t __b)20295 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
20296 {
20297   return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
20298 						  (int32x2_t) __a);
20299 }
20300 
20301 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_u64(uint64x1_t __a,uint64x1_t __b)20302 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
20303 {
20304   return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
20305 						(int64x1_t) __a);
20306 }
20307 
20308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_s8(int8x16_t __a,int8x16_t __b)20309 vcleq_s8 (int8x16_t __a, int8x16_t __b)
20310 {
20311   return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
20312 }
20313 
20314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_s16(int16x8_t __a,int16x8_t __b)20315 vcleq_s16 (int16x8_t __a, int16x8_t __b)
20316 {
20317   return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
20318 }
20319 
20320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_s32(int32x4_t __a,int32x4_t __b)20321 vcleq_s32 (int32x4_t __a, int32x4_t __b)
20322 {
20323   return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
20324 }
20325 
20326 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_s64(int64x2_t __a,int64x2_t __b)20327 vcleq_s64 (int64x2_t __a, int64x2_t __b)
20328 {
20329   return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
20330 }
20331 
20332 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_u8(uint8x16_t __a,uint8x16_t __b)20333 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
20334 {
20335   return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
20336 						   (int8x16_t) __a);
20337 }
20338 
20339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_u16(uint16x8_t __a,uint16x8_t __b)20340 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
20341 {
20342   return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
20343 						  (int16x8_t) __a);
20344 }
20345 
20346 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_u32(uint32x4_t __a,uint32x4_t __b)20347 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
20348 {
20349   return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
20350 						  (int32x4_t) __a);
20351 }
20352 
20353 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_u64(uint64x2_t __a,uint64x2_t __b)20354 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
20355 {
20356   return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
20357 						  (int64x2_t) __a);
20358 }
20359 
20360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcled_s64(int64x1_t __a,int64x1_t __b)20361 vcled_s64 (int64x1_t __a, int64x1_t __b)
20362 {
20363   return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
20364 }
20365 
20366 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclezd_s64(int64x1_t __a)20367 vclezd_s64 (int64x1_t __a)
20368 {
20369   return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
20370 }
20371 
20372 /* vclt */
20373 
20374 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_s8(int8x8_t __a,int8x8_t __b)20375 vclt_s8 (int8x8_t __a, int8x8_t __b)
20376 {
20377   return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
20378 }
20379 
20380 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_s16(int16x4_t __a,int16x4_t __b)20381 vclt_s16 (int16x4_t __a, int16x4_t __b)
20382 {
20383   return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
20384 }
20385 
20386 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_s32(int32x2_t __a,int32x2_t __b)20387 vclt_s32 (int32x2_t __a, int32x2_t __b)
20388 {
20389   return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
20390 }
20391 
20392 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_s64(int64x1_t __a,int64x1_t __b)20393 vclt_s64 (int64x1_t __a, int64x1_t __b)
20394 {
20395   return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
20396 }
20397 
20398 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_u8(uint8x8_t __a,uint8x8_t __b)20399 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
20400 {
20401   return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
20402 						 (int8x8_t) __a);
20403 }
20404 
20405 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_u16(uint16x4_t __a,uint16x4_t __b)20406 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
20407 {
20408   return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
20409 						  (int16x4_t) __a);
20410 }
20411 
20412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_u32(uint32x2_t __a,uint32x2_t __b)20413 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
20414 {
20415   return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
20416 						  (int32x2_t) __a);
20417 }
20418 
20419 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_u64(uint64x1_t __a,uint64x1_t __b)20420 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
20421 {
20422   return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
20423 						(int64x1_t) __a);
20424 }
20425 
20426 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_s8(int8x16_t __a,int8x16_t __b)20427 vcltq_s8 (int8x16_t __a, int8x16_t __b)
20428 {
20429   return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
20430 }
20431 
20432 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_s16(int16x8_t __a,int16x8_t __b)20433 vcltq_s16 (int16x8_t __a, int16x8_t __b)
20434 {
20435   return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
20436 }
20437 
20438 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_s32(int32x4_t __a,int32x4_t __b)20439 vcltq_s32 (int32x4_t __a, int32x4_t __b)
20440 {
20441   return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
20442 }
20443 
20444 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_s64(int64x2_t __a,int64x2_t __b)20445 vcltq_s64 (int64x2_t __a, int64x2_t __b)
20446 {
20447   return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
20448 }
20449 
20450 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_u8(uint8x16_t __a,uint8x16_t __b)20451 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
20452 {
20453   return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
20454 						   (int8x16_t) __a);
20455 }
20456 
20457 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_u16(uint16x8_t __a,uint16x8_t __b)20458 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
20459 {
20460   return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
20461 						  (int16x8_t) __a);
20462 }
20463 
20464 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_u32(uint32x4_t __a,uint32x4_t __b)20465 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
20466 {
20467   return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
20468 						  (int32x4_t) __a);
20469 }
20470 
20471 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_u64(uint64x2_t __a,uint64x2_t __b)20472 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
20473 {
20474   return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
20475 						  (int64x2_t) __a);
20476 }
20477 
20478 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltd_s64(int64x1_t __a,int64x1_t __b)20479 vcltd_s64 (int64x1_t __a, int64x1_t __b)
20480 {
20481   return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
20482 }
20483 
20484 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltzd_s64(int64x1_t __a)20485 vcltzd_s64 (int64x1_t __a)
20486 {
20487   return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
20488 }
20489 
20490 /* vdup */
20491 
20492 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vdupb_lane_s8(int8x16_t a,int const b)20493 vdupb_lane_s8 (int8x16_t a, int const b)
20494 {
20495   return __builtin_aarch64_dup_laneqi (a, b);
20496 }
20497 
20498 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vdupb_lane_u8(uint8x16_t a,int const b)20499 vdupb_lane_u8 (uint8x16_t a, int const b)
20500 {
20501   return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b);
20502 }
20503 
20504 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vduph_lane_s16(int16x8_t a,int const b)20505 vduph_lane_s16 (int16x8_t a, int const b)
20506 {
20507   return __builtin_aarch64_dup_lanehi (a, b);
20508 }
20509 
20510 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vduph_lane_u16(uint16x8_t a,int const b)20511 vduph_lane_u16 (uint16x8_t a, int const b)
20512 {
20513   return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b);
20514 }
20515 
20516 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vdups_lane_s32(int32x4_t a,int const b)20517 vdups_lane_s32 (int32x4_t a, int const b)
20518 {
20519   return __builtin_aarch64_dup_lanesi (a, b);
20520 }
20521 
20522 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vdups_lane_u32(uint32x4_t a,int const b)20523 vdups_lane_u32 (uint32x4_t a, int const b)
20524 {
20525   return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b);
20526 }
20527 
20528 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdupd_lane_s64(int64x2_t a,int const b)20529 vdupd_lane_s64 (int64x2_t a, int const b)
20530 {
20531   return __builtin_aarch64_dup_lanedi (a, b);
20532 }
20533 
20534 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdupd_lane_u64(uint64x2_t a,int const b)20535 vdupd_lane_u64 (uint64x2_t a, int const b)
20536 {
20537   return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
20538 }
20539 
20540 /* vldn */
20541 
20542 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_s64(const int64_t * __a)20543 vld2_s64 (const int64_t * __a)
20544 {
20545   int64x1x2_t ret;
20546   __builtin_aarch64_simd_oi __o;
20547   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
20548   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
20549   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
20550   return ret;
20551 }
20552 
20553 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
vld2_u64(const uint64_t * __a)20554 vld2_u64 (const uint64_t * __a)
20555 {
20556   uint64x1x2_t ret;
20557   __builtin_aarch64_simd_oi __o;
20558   __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
20559   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
20560   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
20561   return ret;
20562 }
20563 
20564 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
vld2_f64(const float64_t * __a)20565 vld2_f64 (const float64_t * __a)
20566 {
20567   float64x1x2_t ret;
20568   __builtin_aarch64_simd_oi __o;
20569   __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
20570   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
20571   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
20572   return ret;
20573 }
20574 
20575 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vld2_s8(const int8_t * __a)20576 vld2_s8 (const int8_t * __a)
20577 {
20578   int8x8x2_t ret;
20579   __builtin_aarch64_simd_oi __o;
20580   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20581   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20582   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20583   return ret;
20584 }
20585 
20586 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vld2_p8(const poly8_t * __a)20587 vld2_p8 (const poly8_t * __a)
20588 {
20589   poly8x8x2_t ret;
20590   __builtin_aarch64_simd_oi __o;
20591   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20592   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20593   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20594   return ret;
20595 }
20596 
20597 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vld2_s16(const int16_t * __a)20598 vld2_s16 (const int16_t * __a)
20599 {
20600   int16x4x2_t ret;
20601   __builtin_aarch64_simd_oi __o;
20602   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20603   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20604   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20605   return ret;
20606 }
20607 
20608 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vld2_p16(const poly16_t * __a)20609 vld2_p16 (const poly16_t * __a)
20610 {
20611   poly16x4x2_t ret;
20612   __builtin_aarch64_simd_oi __o;
20613   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20614   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20615   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20616   return ret;
20617 }
20618 
20619 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vld2_s32(const int32_t * __a)20620 vld2_s32 (const int32_t * __a)
20621 {
20622   int32x2x2_t ret;
20623   __builtin_aarch64_simd_oi __o;
20624   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20625   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20626   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20627   return ret;
20628 }
20629 
20630 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vld2_u8(const uint8_t * __a)20631 vld2_u8 (const uint8_t * __a)
20632 {
20633   uint8x8x2_t ret;
20634   __builtin_aarch64_simd_oi __o;
20635   __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20636   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20637   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20638   return ret;
20639 }
20640 
20641 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vld2_u16(const uint16_t * __a)20642 vld2_u16 (const uint16_t * __a)
20643 {
20644   uint16x4x2_t ret;
20645   __builtin_aarch64_simd_oi __o;
20646   __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20647   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20648   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20649   return ret;
20650 }
20651 
20652 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vld2_u32(const uint32_t * __a)20653 vld2_u32 (const uint32_t * __a)
20654 {
20655   uint32x2x2_t ret;
20656   __builtin_aarch64_simd_oi __o;
20657   __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20658   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20659   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20660   return ret;
20661 }
20662 
20663 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32(const float32_t * __a)20664 vld2_f32 (const float32_t * __a)
20665 {
20666   float32x2x2_t ret;
20667   __builtin_aarch64_simd_oi __o;
20668   __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
20669   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
20670   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
20671   return ret;
20672 }
20673 
20674 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vld2q_s8(const int8_t * __a)20675 vld2q_s8 (const int8_t * __a)
20676 {
20677   int8x16x2_t ret;
20678   __builtin_aarch64_simd_oi __o;
20679   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20680   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20681   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20682   return ret;
20683 }
20684 
20685 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vld2q_p8(const poly8_t * __a)20686 vld2q_p8 (const poly8_t * __a)
20687 {
20688   poly8x16x2_t ret;
20689   __builtin_aarch64_simd_oi __o;
20690   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20691   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20692   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20693   return ret;
20694 }
20695 
20696 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vld2q_s16(const int16_t * __a)20697 vld2q_s16 (const int16_t * __a)
20698 {
20699   int16x8x2_t ret;
20700   __builtin_aarch64_simd_oi __o;
20701   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20702   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20703   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20704   return ret;
20705 }
20706 
20707 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vld2q_p16(const poly16_t * __a)20708 vld2q_p16 (const poly16_t * __a)
20709 {
20710   poly16x8x2_t ret;
20711   __builtin_aarch64_simd_oi __o;
20712   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20713   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20714   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20715   return ret;
20716 }
20717 
20718 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vld2q_s32(const int32_t * __a)20719 vld2q_s32 (const int32_t * __a)
20720 {
20721   int32x4x2_t ret;
20722   __builtin_aarch64_simd_oi __o;
20723   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20724   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20725   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20726   return ret;
20727 }
20728 
20729 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
vld2q_s64(const int64_t * __a)20730 vld2q_s64 (const int64_t * __a)
20731 {
20732   int64x2x2_t ret;
20733   __builtin_aarch64_simd_oi __o;
20734   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20735   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20736   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20737   return ret;
20738 }
20739 
20740 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vld2q_u8(const uint8_t * __a)20741 vld2q_u8 (const uint8_t * __a)
20742 {
20743   uint8x16x2_t ret;
20744   __builtin_aarch64_simd_oi __o;
20745   __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20746   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20747   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20748   return ret;
20749 }
20750 
20751 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vld2q_u16(const uint16_t * __a)20752 vld2q_u16 (const uint16_t * __a)
20753 {
20754   uint16x8x2_t ret;
20755   __builtin_aarch64_simd_oi __o;
20756   __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20757   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20758   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20759   return ret;
20760 }
20761 
20762 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vld2q_u32(const uint32_t * __a)20763 vld2q_u32 (const uint32_t * __a)
20764 {
20765   uint32x4x2_t ret;
20766   __builtin_aarch64_simd_oi __o;
20767   __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20768   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20769   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20770   return ret;
20771 }
20772 
20773 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
vld2q_u64(const uint64_t * __a)20774 vld2q_u64 (const uint64_t * __a)
20775 {
20776   uint64x2x2_t ret;
20777   __builtin_aarch64_simd_oi __o;
20778   __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20779   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20780   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20781   return ret;
20782 }
20783 
20784 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32(const float32_t * __a)20785 vld2q_f32 (const float32_t * __a)
20786 {
20787   float32x4x2_t ret;
20788   __builtin_aarch64_simd_oi __o;
20789   __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
20790   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
20791   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
20792   return ret;
20793 }
20794 
20795 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
vld2q_f64(const float64_t * __a)20796 vld2q_f64 (const float64_t * __a)
20797 {
20798   float64x2x2_t ret;
20799   __builtin_aarch64_simd_oi __o;
20800   __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
20801   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
20802   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
20803   return ret;
20804 }
20805 
20806 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_s64(const int64_t * __a)20807 vld3_s64 (const int64_t * __a)
20808 {
20809   int64x1x3_t ret;
20810   __builtin_aarch64_simd_ci __o;
20811   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20812   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20813   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20814   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20815   return ret;
20816 }
20817 
20818 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
vld3_u64(const uint64_t * __a)20819 vld3_u64 (const uint64_t * __a)
20820 {
20821   uint64x1x3_t ret;
20822   __builtin_aarch64_simd_ci __o;
20823   __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20824   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20825   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20826   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20827   return ret;
20828 }
20829 
20830 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
vld3_f64(const float64_t * __a)20831 vld3_f64 (const float64_t * __a)
20832 {
20833   float64x1x3_t ret;
20834   __builtin_aarch64_simd_ci __o;
20835   __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
20836   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
20837   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
20838   ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
20839   return ret;
20840 }
20841 
20842 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
vld3_s8(const int8_t * __a)20843 vld3_s8 (const int8_t * __a)
20844 {
20845   int8x8x3_t ret;
20846   __builtin_aarch64_simd_ci __o;
20847   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20848   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20849   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20850   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20851   return ret;
20852 }
20853 
20854 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
vld3_p8(const poly8_t * __a)20855 vld3_p8 (const poly8_t * __a)
20856 {
20857   poly8x8x3_t ret;
20858   __builtin_aarch64_simd_ci __o;
20859   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20860   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20861   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20862   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20863   return ret;
20864 }
20865 
20866 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
vld3_s16(const int16_t * __a)20867 vld3_s16 (const int16_t * __a)
20868 {
20869   int16x4x3_t ret;
20870   __builtin_aarch64_simd_ci __o;
20871   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20872   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20873   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20874   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20875   return ret;
20876 }
20877 
20878 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
vld3_p16(const poly16_t * __a)20879 vld3_p16 (const poly16_t * __a)
20880 {
20881   poly16x4x3_t ret;
20882   __builtin_aarch64_simd_ci __o;
20883   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20884   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20885   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20886   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20887   return ret;
20888 }
20889 
20890 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
vld3_s32(const int32_t * __a)20891 vld3_s32 (const int32_t * __a)
20892 {
20893   int32x2x3_t ret;
20894   __builtin_aarch64_simd_ci __o;
20895   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20896   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20897   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20898   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20899   return ret;
20900 }
20901 
20902 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
vld3_u8(const uint8_t * __a)20903 vld3_u8 (const uint8_t * __a)
20904 {
20905   uint8x8x3_t ret;
20906   __builtin_aarch64_simd_ci __o;
20907   __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20908   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20909   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20910   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20911   return ret;
20912 }
20913 
20914 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
vld3_u16(const uint16_t * __a)20915 vld3_u16 (const uint16_t * __a)
20916 {
20917   uint16x4x3_t ret;
20918   __builtin_aarch64_simd_ci __o;
20919   __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20920   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20921   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20922   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20923   return ret;
20924 }
20925 
20926 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
vld3_u32(const uint32_t * __a)20927 vld3_u32 (const uint32_t * __a)
20928 {
20929   uint32x2x3_t ret;
20930   __builtin_aarch64_simd_ci __o;
20931   __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20932   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20933   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20934   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20935   return ret;
20936 }
20937 
20938 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32(const float32_t * __a)20939 vld3_f32 (const float32_t * __a)
20940 {
20941   float32x2x3_t ret;
20942   __builtin_aarch64_simd_ci __o;
20943   __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
20944   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
20945   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
20946   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
20947   return ret;
20948 }
20949 
20950 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
vld3q_s8(const int8_t * __a)20951 vld3q_s8 (const int8_t * __a)
20952 {
20953   int8x16x3_t ret;
20954   __builtin_aarch64_simd_ci __o;
20955   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20956   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20957   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20958   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20959   return ret;
20960 }
20961 
20962 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
vld3q_p8(const poly8_t * __a)20963 vld3q_p8 (const poly8_t * __a)
20964 {
20965   poly8x16x3_t ret;
20966   __builtin_aarch64_simd_ci __o;
20967   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20968   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20969   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20970   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20971   return ret;
20972 }
20973 
20974 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
vld3q_s16(const int16_t * __a)20975 vld3q_s16 (const int16_t * __a)
20976 {
20977   int16x8x3_t ret;
20978   __builtin_aarch64_simd_ci __o;
20979   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20980   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20981   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20982   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20983   return ret;
20984 }
20985 
20986 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
vld3q_p16(const poly16_t * __a)20987 vld3q_p16 (const poly16_t * __a)
20988 {
20989   poly16x8x3_t ret;
20990   __builtin_aarch64_simd_ci __o;
20991   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20992   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20993   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20994   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20995   return ret;
20996 }
20997 
20998 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
vld3q_s32(const int32_t * __a)20999 vld3q_s32 (const int32_t * __a)
21000 {
21001   int32x4x3_t ret;
21002   __builtin_aarch64_simd_ci __o;
21003   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
21004   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
21005   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
21006   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
21007   return ret;
21008 }
21009 
21010 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
vld3q_s64(const int64_t * __a)21011 vld3q_s64 (const int64_t * __a)
21012 {
21013   int64x2x3_t ret;
21014   __builtin_aarch64_simd_ci __o;
21015   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
21016   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
21017   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
21018   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
21019   return ret;
21020 }
21021 
21022 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
vld3q_u8(const uint8_t * __a)21023 vld3q_u8 (const uint8_t * __a)
21024 {
21025   uint8x16x3_t ret;
21026   __builtin_aarch64_simd_ci __o;
21027   __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
21028   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
21029   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
21030   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
21031   return ret;
21032 }
21033 
21034 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
vld3q_u16(const uint16_t * __a)21035 vld3q_u16 (const uint16_t * __a)
21036 {
21037   uint16x8x3_t ret;
21038   __builtin_aarch64_simd_ci __o;
21039   __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
21040   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
21041   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
21042   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
21043   return ret;
21044 }
21045 
21046 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
vld3q_u32(const uint32_t * __a)21047 vld3q_u32 (const uint32_t * __a)
21048 {
21049   uint32x4x3_t ret;
21050   __builtin_aarch64_simd_ci __o;
21051   __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
21052   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
21053   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
21054   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
21055   return ret;
21056 }
21057 
21058 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
vld3q_u64(const uint64_t * __a)21059 vld3q_u64 (const uint64_t * __a)
21060 {
21061   uint64x2x3_t ret;
21062   __builtin_aarch64_simd_ci __o;
21063   __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
21064   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
21065   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
21066   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
21067   return ret;
21068 }
21069 
21070 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32(const float32_t * __a)21071 vld3q_f32 (const float32_t * __a)
21072 {
21073   float32x4x3_t ret;
21074   __builtin_aarch64_simd_ci __o;
21075   __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
21076   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
21077   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
21078   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
21079   return ret;
21080 }
21081 
21082 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
vld3q_f64(const float64_t * __a)21083 vld3q_f64 (const float64_t * __a)
21084 {
21085   float64x2x3_t ret;
21086   __builtin_aarch64_simd_ci __o;
21087   __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
21088   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
21089   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
21090   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
21091   return ret;
21092 }
21093 
21094 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_s64(const int64_t * __a)21095 vld4_s64 (const int64_t * __a)
21096 {
21097   int64x1x4_t ret;
21098   __builtin_aarch64_simd_xi __o;
21099   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
21100   ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
21101   ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
21102   ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
21103   ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
21104   return ret;
21105 }
21106 
21107 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
vld4_u64(const uint64_t * __a)21108 vld4_u64 (const uint64_t * __a)
21109 {
21110   uint64x1x4_t ret;
21111   __builtin_aarch64_simd_xi __o;
21112   __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
21113   ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
21114   ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
21115   ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
21116   ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
21117   return ret;
21118 }
21119 
21120 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
vld4_f64(const float64_t * __a)21121 vld4_f64 (const float64_t * __a)
21122 {
21123   float64x1x4_t ret;
21124   __builtin_aarch64_simd_xi __o;
21125   __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
21126   ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
21127   ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
21128   ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
21129   ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
21130   return ret;
21131 }
21132 
21133 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
vld4_s8(const int8_t * __a)21134 vld4_s8 (const int8_t * __a)
21135 {
21136   int8x8x4_t ret;
21137   __builtin_aarch64_simd_xi __o;
21138   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21139   ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21140   ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21141   ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21142   ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21143   return ret;
21144 }
21145 
21146 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
vld4_p8(const poly8_t * __a)21147 vld4_p8 (const poly8_t * __a)
21148 {
21149   poly8x8x4_t ret;
21150   __builtin_aarch64_simd_xi __o;
21151   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21152   ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21153   ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21154   ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21155   ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21156   return ret;
21157 }
21158 
21159 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
vld4_s16(const int16_t * __a)21160 vld4_s16 (const int16_t * __a)
21161 {
21162   int16x4x4_t ret;
21163   __builtin_aarch64_simd_xi __o;
21164   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21165   ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21166   ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21167   ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21168   ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21169   return ret;
21170 }
21171 
21172 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
vld4_p16(const poly16_t * __a)21173 vld4_p16 (const poly16_t * __a)
21174 {
21175   poly16x4x4_t ret;
21176   __builtin_aarch64_simd_xi __o;
21177   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21178   ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21179   ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21180   ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21181   ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21182   return ret;
21183 }
21184 
21185 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
vld4_s32(const int32_t * __a)21186 vld4_s32 (const int32_t * __a)
21187 {
21188   int32x2x4_t ret;
21189   __builtin_aarch64_simd_xi __o;
21190   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
21191   ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
21192   ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
21193   ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
21194   ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
21195   return ret;
21196 }
21197 
21198 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
vld4_u8(const uint8_t * __a)21199 vld4_u8 (const uint8_t * __a)
21200 {
21201   uint8x8x4_t ret;
21202   __builtin_aarch64_simd_xi __o;
21203   __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21204   ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21205   ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21206   ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21207   ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21208   return ret;
21209 }
21210 
21211 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
vld4_u16(const uint16_t * __a)21212 vld4_u16 (const uint16_t * __a)
21213 {
21214   uint16x4x4_t ret;
21215   __builtin_aarch64_simd_xi __o;
21216   __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21217   ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21218   ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21219   ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21220   ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21221   return ret;
21222 }
21223 
21224 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
vld4_u32(const uint32_t * __a)21225 vld4_u32 (const uint32_t * __a)
21226 {
21227   uint32x2x4_t ret;
21228   __builtin_aarch64_simd_xi __o;
21229   __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
21230   ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
21231   ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
21232   ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
21233   ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
21234   return ret;
21235 }
21236 
21237 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32(const float32_t * __a)21238 vld4_f32 (const float32_t * __a)
21239 {
21240   float32x2x4_t ret;
21241   __builtin_aarch64_simd_xi __o;
21242   __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
21243   ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
21244   ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
21245   ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
21246   ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
21247   return ret;
21248 }
21249 
21250 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
vld4q_s8(const int8_t * __a)21251 vld4q_s8 (const int8_t * __a)
21252 {
21253   int8x16x4_t ret;
21254   __builtin_aarch64_simd_xi __o;
21255   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21256   ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21257   ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21258   ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21259   ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21260   return ret;
21261 }
21262 
21263 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
vld4q_p8(const poly8_t * __a)21264 vld4q_p8 (const poly8_t * __a)
21265 {
21266   poly8x16x4_t ret;
21267   __builtin_aarch64_simd_xi __o;
21268   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21269   ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21270   ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21271   ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21272   ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21273   return ret;
21274 }
21275 
21276 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
vld4q_s16(const int16_t * __a)21277 vld4q_s16 (const int16_t * __a)
21278 {
21279   int16x8x4_t ret;
21280   __builtin_aarch64_simd_xi __o;
21281   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21282   ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21283   ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21284   ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21285   ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21286   return ret;
21287 }
21288 
21289 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
vld4q_p16(const poly16_t * __a)21290 vld4q_p16 (const poly16_t * __a)
21291 {
21292   poly16x8x4_t ret;
21293   __builtin_aarch64_simd_xi __o;
21294   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21295   ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21296   ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21297   ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21298   ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21299   return ret;
21300 }
21301 
21302 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
vld4q_s32(const int32_t * __a)21303 vld4q_s32 (const int32_t * __a)
21304 {
21305   int32x4x4_t ret;
21306   __builtin_aarch64_simd_xi __o;
21307   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
21308   ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
21309   ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
21310   ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
21311   ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
21312   return ret;
21313 }
21314 
21315 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
vld4q_s64(const int64_t * __a)21316 vld4q_s64 (const int64_t * __a)
21317 {
21318   int64x2x4_t ret;
21319   __builtin_aarch64_simd_xi __o;
21320   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
21321   ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
21322   ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
21323   ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
21324   ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
21325   return ret;
21326 }
21327 
21328 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
vld4q_u8(const uint8_t * __a)21329 vld4q_u8 (const uint8_t * __a)
21330 {
21331   uint8x16x4_t ret;
21332   __builtin_aarch64_simd_xi __o;
21333   __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21334   ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21335   ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21336   ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21337   ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21338   return ret;
21339 }
21340 
21341 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
vld4q_u16(const uint16_t * __a)21342 vld4q_u16 (const uint16_t * __a)
21343 {
21344   uint16x8x4_t ret;
21345   __builtin_aarch64_simd_xi __o;
21346   __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21347   ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21348   ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21349   ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21350   ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21351   return ret;
21352 }
21353 
21354 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
vld4q_u32(const uint32_t * __a)21355 vld4q_u32 (const uint32_t * __a)
21356 {
21357   uint32x4x4_t ret;
21358   __builtin_aarch64_simd_xi __o;
21359   __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
21360   ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
21361   ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
21362   ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
21363   ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
21364   return ret;
21365 }
21366 
21367 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
vld4q_u64(const uint64_t * __a)21368 vld4q_u64 (const uint64_t * __a)
21369 {
21370   uint64x2x4_t ret;
21371   __builtin_aarch64_simd_xi __o;
21372   __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
21373   ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
21374   ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
21375   ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
21376   ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
21377   return ret;
21378 }
21379 
21380 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32(const float32_t * __a)21381 vld4q_f32 (const float32_t * __a)
21382 {
21383   float32x4x4_t ret;
21384   __builtin_aarch64_simd_xi __o;
21385   __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
21386   ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
21387   ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
21388   ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
21389   ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
21390   return ret;
21391 }
21392 
21393 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
vld4q_f64(const float64_t * __a)21394 vld4q_f64 (const float64_t * __a)
21395 {
21396   float64x2x4_t ret;
21397   __builtin_aarch64_simd_xi __o;
21398   __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
21399   ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
21400   ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
21401   ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
21402   ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
21403   return ret;
21404 }
21405 
21406 /* vmax */
21407 
21408 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmax_f32(float32x2_t __a,float32x2_t __b)21409 vmax_f32 (float32x2_t __a, float32x2_t __b)
21410 {
21411   return __builtin_aarch64_fmaxv2sf (__a, __b);
21412 }
21413 
21414 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmax_s8(int8x8_t __a,int8x8_t __b)21415 vmax_s8 (int8x8_t __a, int8x8_t __b)
21416 {
21417   return __builtin_aarch64_smaxv8qi (__a, __b);
21418 }
21419 
21420 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmax_s16(int16x4_t __a,int16x4_t __b)21421 vmax_s16 (int16x4_t __a, int16x4_t __b)
21422 {
21423   return __builtin_aarch64_smaxv4hi (__a, __b);
21424 }
21425 
21426 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmax_s32(int32x2_t __a,int32x2_t __b)21427 vmax_s32 (int32x2_t __a, int32x2_t __b)
21428 {
21429   return __builtin_aarch64_smaxv2si (__a, __b);
21430 }
21431 
21432 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmax_u8(uint8x8_t __a,uint8x8_t __b)21433 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
21434 {
21435   return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
21436 						 (int8x8_t) __b);
21437 }
21438 
21439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmax_u16(uint16x4_t __a,uint16x4_t __b)21440 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
21441 {
21442   return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
21443 						  (int16x4_t) __b);
21444 }
21445 
21446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmax_u32(uint32x2_t __a,uint32x2_t __b)21447 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
21448 {
21449   return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
21450 						  (int32x2_t) __b);
21451 }
21452 
21453 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxq_f32(float32x4_t __a,float32x4_t __b)21454 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
21455 {
21456   return __builtin_aarch64_fmaxv4sf (__a, __b);
21457 }
21458 
21459 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxq_f64(float64x2_t __a,float64x2_t __b)21460 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
21461 {
21462   return __builtin_aarch64_fmaxv2df (__a, __b);
21463 }
21464 
21465 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmaxq_s8(int8x16_t __a,int8x16_t __b)21466 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
21467 {
21468   return __builtin_aarch64_smaxv16qi (__a, __b);
21469 }
21470 
21471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmaxq_s16(int16x8_t __a,int16x8_t __b)21472 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
21473 {
21474   return __builtin_aarch64_smaxv8hi (__a, __b);
21475 }
21476 
21477 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmaxq_s32(int32x4_t __a,int32x4_t __b)21478 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
21479 {
21480   return __builtin_aarch64_smaxv4si (__a, __b);
21481 }
21482 
21483 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmaxq_u8(uint8x16_t __a,uint8x16_t __b)21484 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
21485 {
21486   return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
21487 						   (int8x16_t) __b);
21488 }
21489 
21490 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmaxq_u16(uint16x8_t __a,uint16x8_t __b)21491 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
21492 {
21493   return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
21494 						  (int16x8_t) __b);
21495 }
21496 
21497 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmaxq_u32(uint32x4_t __a,uint32x4_t __b)21498 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
21499 {
21500   return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
21501 						  (int32x4_t) __b);
21502 }
21503 
21504 /* vmin */
21505 
21506 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmin_f32(float32x2_t __a,float32x2_t __b)21507 vmin_f32 (float32x2_t __a, float32x2_t __b)
21508 {
21509   return __builtin_aarch64_fminv2sf (__a, __b);
21510 }
21511 
21512 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmin_s8(int8x8_t __a,int8x8_t __b)21513 vmin_s8 (int8x8_t __a, int8x8_t __b)
21514 {
21515   return __builtin_aarch64_sminv8qi (__a, __b);
21516 }
21517 
21518 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmin_s16(int16x4_t __a,int16x4_t __b)21519 vmin_s16 (int16x4_t __a, int16x4_t __b)
21520 {
21521   return __builtin_aarch64_sminv4hi (__a, __b);
21522 }
21523 
21524 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmin_s32(int32x2_t __a,int32x2_t __b)21525 vmin_s32 (int32x2_t __a, int32x2_t __b)
21526 {
21527   return __builtin_aarch64_sminv2si (__a, __b);
21528 }
21529 
21530 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmin_u8(uint8x8_t __a,uint8x8_t __b)21531 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
21532 {
21533   return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
21534 						 (int8x8_t) __b);
21535 }
21536 
21537 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmin_u16(uint16x4_t __a,uint16x4_t __b)21538 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
21539 {
21540   return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
21541 						  (int16x4_t) __b);
21542 }
21543 
21544 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmin_u32(uint32x2_t __a,uint32x2_t __b)21545 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
21546 {
21547   return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
21548 						  (int32x2_t) __b);
21549 }
21550 
21551 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminq_f32(float32x4_t __a,float32x4_t __b)21552 vminq_f32 (float32x4_t __a, float32x4_t __b)
21553 {
21554   return __builtin_aarch64_fminv4sf (__a, __b);
21555 }
21556 
21557 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vminq_f64(float64x2_t __a,float64x2_t __b)21558 vminq_f64 (float64x2_t __a, float64x2_t __b)
21559 {
21560   return __builtin_aarch64_fminv2df (__a, __b);
21561 }
21562 
21563 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vminq_s8(int8x16_t __a,int8x16_t __b)21564 vminq_s8 (int8x16_t __a, int8x16_t __b)
21565 {
21566   return __builtin_aarch64_sminv16qi (__a, __b);
21567 }
21568 
21569 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vminq_s16(int16x8_t __a,int16x8_t __b)21570 vminq_s16 (int16x8_t __a, int16x8_t __b)
21571 {
21572   return __builtin_aarch64_sminv8hi (__a, __b);
21573 }
21574 
21575 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vminq_s32(int32x4_t __a,int32x4_t __b)21576 vminq_s32 (int32x4_t __a, int32x4_t __b)
21577 {
21578   return __builtin_aarch64_sminv4si (__a, __b);
21579 }
21580 
21581 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vminq_u8(uint8x16_t __a,uint8x16_t __b)21582 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
21583 {
21584   return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
21585 						   (int8x16_t) __b);
21586 }
21587 
21588 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vminq_u16(uint16x8_t __a,uint16x8_t __b)21589 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
21590 {
21591   return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
21592 						  (int16x8_t) __b);
21593 }
21594 
21595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vminq_u32(uint32x4_t __a,uint32x4_t __b)21596 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
21597 {
21598   return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
21599 						  (int32x4_t) __b);
21600 }
21601 
21602 /* vmla */
21603 
21604 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)21605 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21606 {
21607   return a + b * c;
21608 }
21609 
21610 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)21611 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21612 {
21613   return a + b * c;
21614 }
21615 
21616 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlaq_f64(float64x2_t a,float64x2_t b,float64x2_t c)21617 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21618 {
21619   return a + b * c;
21620 }
21621 
21622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)21623 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21624 {
21625   return a - b * c;
21626 }
21627 
21628 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)21629 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21630 {
21631   return a - b * c;
21632 }
21633 
21634 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmlsq_f64(float64x2_t a,float64x2_t b,float64x2_t c)21635 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21636 {
21637   return a - b * c;
21638 }
21639 
21640 /* vqabs */
21641 
21642 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqabsq_s64(int64x2_t __a)21643 vqabsq_s64 (int64x2_t __a)
21644 {
21645   return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21646 }
21647 
21648 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqabsb_s8(int8x1_t __a)21649 vqabsb_s8 (int8x1_t __a)
21650 {
21651   return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21652 }
21653 
21654 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqabsh_s16(int16x1_t __a)21655 vqabsh_s16 (int16x1_t __a)
21656 {
21657   return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21658 }
21659 
21660 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqabss_s32(int32x1_t __a)21661 vqabss_s32 (int32x1_t __a)
21662 {
21663   return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21664 }
21665 
21666 /* vqadd */
21667 
21668 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqaddb_s8(int8x1_t __a,int8x1_t __b)21669 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21670 {
21671   return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21672 }
21673 
21674 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqaddh_s16(int16x1_t __a,int16x1_t __b)21675 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21676 {
21677   return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21678 }
21679 
21680 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqadds_s32(int32x1_t __a,int32x1_t __b)21681 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21682 {
21683   return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21684 }
21685 
21686 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqaddd_s64(int64x1_t __a,int64x1_t __b)21687 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21688 {
21689   return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21690 }
21691 
21692 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqaddb_u8(uint8x1_t __a,uint8x1_t __b)21693 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21694 {
21695   return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21696 }
21697 
21698 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqaddh_u16(uint16x1_t __a,uint16x1_t __b)21699 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21700 {
21701   return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21702 }
21703 
21704 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqadds_u32(uint32x1_t __a,uint32x1_t __b)21705 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21706 {
21707   return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21708 }
21709 
21710 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqaddd_u64(uint64x1_t __a,uint64x1_t __b)21711 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21712 {
21713   return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21714 }
21715 
21716 /* vqdmlal */
21717 
21718 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c)21719 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21720 {
21721   return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21722 }
21723 
21724 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c)21725 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21726 {
21727   return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21728 }
21729 
21730 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_lane_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)21731 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21732 		       int const __d)
21733 {
21734   return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21735 }
21736 
21737 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_laneq_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)21738 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21739 			int const __d)
21740 {
21741   return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21742 }
21743 
21744 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_high_n_s16(int32x4_t __a,int16x8_t __b,int16_t __c)21745 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21746 {
21747   return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21748 }
21749 
21750 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_lane_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c,int const __d)21751 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21752 {
21753   int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21754   return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21755 }
21756 
21757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_laneq_s16(int32x4_t __a,int16x4_t __b,int16x8_t __c,int const __d)21758 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21759 {
21760   return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21761 }
21762 
21763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_n_s16(int32x4_t __a,int16x4_t __b,int16_t __c)21764 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21765 {
21766   return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21767 }
21768 
21769 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c)21770 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21771 {
21772   return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21773 }
21774 
21775 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c)21776 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21777 {
21778   return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21779 }
21780 
21781 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_lane_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)21782 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21783 		       int const __d)
21784 {
21785   return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21786 }
21787 
21788 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_laneq_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)21789 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21790 			int const __d)
21791 {
21792   return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21793 }
21794 
21795 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_high_n_s32(int64x2_t __a,int32x4_t __b,int32_t __c)21796 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21797 {
21798   return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21799 }
21800 
21801 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_lane_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c,int const __d)21802 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21803 {
21804   int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21805   return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21806 }
21807 
21808 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_laneq_s32(int64x2_t __a,int32x2_t __b,int32x4_t __c,int const __d)21809 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21810 {
21811   return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21812 }
21813 
21814 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_n_s32(int64x2_t __a,int32x2_t __b,int32_t __c)21815 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21816 {
21817   return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21818 }
21819 
21820 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmlalh_s16(int32x1_t __a,int16x1_t __b,int16x1_t __c)21821 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21822 {
21823   return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21824 }
21825 
21826 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmlalh_lane_s16(int32x1_t __a,int16x1_t __b,int16x8_t __c,const int __d)21827 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21828 {
21829   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21830 }
21831 
21832 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmlals_s32(int64x1_t __a,int32x1_t __b,int32x1_t __c)21833 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21834 {
21835   return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21836 }
21837 
21838 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmlals_lane_s32(int64x1_t __a,int32x1_t __b,int32x4_t __c,const int __d)21839 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21840 {
21841   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21842 }
21843 
21844 /* vqdmlsl */
21845 
21846 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c)21847 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21848 {
21849   return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21850 }
21851 
21852 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c)21853 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21854 {
21855   return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21856 }
21857 
21858 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_lane_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)21859 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21860 		       int const __d)
21861 {
21862   return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21863 }
21864 
21865 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_laneq_s16(int32x4_t __a,int16x8_t __b,int16x8_t __c,int const __d)21866 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21867 			int const __d)
21868 {
21869   return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21870 }
21871 
21872 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_high_n_s16(int32x4_t __a,int16x8_t __b,int16_t __c)21873 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21874 {
21875   return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21876 }
21877 
21878 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s16(int32x4_t __a,int16x4_t __b,int16x4_t __c,int const __d)21879 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21880 {
21881   int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21882   return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21883 }
21884 
21885 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_laneq_s16(int32x4_t __a,int16x4_t __b,int16x8_t __c,int const __d)21886 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21887 {
21888   return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21889 }
21890 
21891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_n_s16(int32x4_t __a,int16x4_t __b,int16_t __c)21892 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21893 {
21894   return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21895 }
21896 
21897 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c)21898 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21899 {
21900   return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21901 }
21902 
21903 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c)21904 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21905 {
21906   return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21907 }
21908 
21909 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_lane_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)21910 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21911 		       int const __d)
21912 {
21913   return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21914 }
21915 
21916 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_laneq_s32(int64x2_t __a,int32x4_t __b,int32x4_t __c,int const __d)21917 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21918 			int const __d)
21919 {
21920   return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21921 }
21922 
21923 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_high_n_s32(int64x2_t __a,int32x4_t __b,int32_t __c)21924 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21925 {
21926   return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21927 }
21928 
21929 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s32(int64x2_t __a,int32x2_t __b,int32x2_t __c,int const __d)21930 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21931 {
21932   int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21933   return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21934 }
21935 
21936 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_laneq_s32(int64x2_t __a,int32x2_t __b,int32x4_t __c,int const __d)21937 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21938 {
21939   return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21940 }
21941 
21942 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_n_s32(int64x2_t __a,int32x2_t __b,int32_t __c)21943 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21944 {
21945   return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21946 }
21947 
21948 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmlslh_s16(int32x1_t __a,int16x1_t __b,int16x1_t __c)21949 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21950 {
21951   return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21952 }
21953 
21954 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmlslh_lane_s16(int32x1_t __a,int16x1_t __b,int16x8_t __c,const int __d)21955 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21956 {
21957   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21958 }
21959 
21960 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmlsls_s32(int64x1_t __a,int32x1_t __b,int32x1_t __c)21961 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21962 {
21963   return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21964 }
21965 
21966 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmlsls_lane_s32(int64x1_t __a,int32x1_t __b,int32x4_t __c,const int __d)21967 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21968 {
21969   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21970 }
21971 
21972 /* vqdmulh */
21973 
21974 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqdmulh_lane_s16(int16x4_t __a,int16x4_t __b,const int __c)21975 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21976 {
21977   return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21978 }
21979 
21980 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqdmulh_lane_s32(int32x2_t __a,int32x2_t __b,const int __c)21981 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21982 {
21983   return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21984 }
21985 
21986 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqdmulhq_lane_s16(int16x8_t __a,int16x4_t __b,const int __c)21987 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21988 {
21989   return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21990 }
21991 
21992 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmulhq_lane_s32(int32x4_t __a,int32x2_t __b,const int __c)21993 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21994 {
21995   return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21996 }
21997 
21998 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqdmulhh_s16(int16x1_t __a,int16x1_t __b)21999 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
22000 {
22001   return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
22002 }
22003 
22004 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqdmulhh_lane_s16(int16x1_t __a,int16x8_t __b,const int __c)22005 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22006 {
22007   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
22008 }
22009 
22010 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmulhs_s32(int32x1_t __a,int32x1_t __b)22011 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
22012 {
22013   return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
22014 }
22015 
22016 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmulhs_lane_s32(int32x1_t __a,int32x4_t __b,const int __c)22017 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22018 {
22019   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
22020 }
22021 
22022 /* vqdmull */
22023 
22024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_s16(int16x4_t __a,int16x4_t __b)22025 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
22026 {
22027   return __builtin_aarch64_sqdmullv4hi (__a, __b);
22028 }
22029 
22030 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_s16(int16x8_t __a,int16x8_t __b)22031 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
22032 {
22033   return __builtin_aarch64_sqdmull2v8hi (__a, __b);
22034 }
22035 
22036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_lane_s16(int16x8_t __a,int16x8_t __b,int const __c)22037 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
22038 {
22039   return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
22040 }
22041 
22042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_laneq_s16(int16x8_t __a,int16x8_t __b,int const __c)22043 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
22044 {
22045   return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
22046 }
22047 
22048 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_high_n_s16(int16x8_t __a,int16_t __b)22049 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
22050 {
22051   return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
22052 }
22053 
22054 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_lane_s16(int16x4_t __a,int16x4_t __b,int const __c)22055 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
22056 {
22057   int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
22058   return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
22059 }
22060 
22061 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_laneq_s16(int16x4_t __a,int16x8_t __b,int const __c)22062 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
22063 {
22064   return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
22065 }
22066 
22067 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_n_s16(int16x4_t __a,int16_t __b)22068 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
22069 {
22070   return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
22071 }
22072 
22073 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_s32(int32x2_t __a,int32x2_t __b)22074 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
22075 {
22076   return __builtin_aarch64_sqdmullv2si (__a, __b);
22077 }
22078 
22079 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_s32(int32x4_t __a,int32x4_t __b)22080 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
22081 {
22082   return __builtin_aarch64_sqdmull2v4si (__a, __b);
22083 }
22084 
22085 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_lane_s32(int32x4_t __a,int32x4_t __b,int const __c)22086 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
22087 {
22088   return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
22089 }
22090 
22091 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_laneq_s32(int32x4_t __a,int32x4_t __b,int const __c)22092 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
22093 {
22094   return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
22095 }
22096 
22097 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_high_n_s32(int32x4_t __a,int32_t __b)22098 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
22099 {
22100   return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
22101 }
22102 
22103 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_lane_s32(int32x2_t __a,int32x2_t __b,int const __c)22104 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
22105 {
22106   int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
22107   return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
22108 }
22109 
22110 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_laneq_s32(int32x2_t __a,int32x4_t __b,int const __c)22111 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
22112 {
22113   return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
22114 }
22115 
22116 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_n_s32(int32x2_t __a,int32_t __b)22117 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
22118 {
22119   return __builtin_aarch64_sqdmull_nv2si (__a, __b);
22120 }
22121 
22122 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmullh_s16(int16x1_t __a,int16x1_t __b)22123 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
22124 {
22125   return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
22126 }
22127 
22128 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmullh_lane_s16(int16x1_t __a,int16x8_t __b,const int __c)22129 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22130 {
22131   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
22132 }
22133 
22134 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmulls_s32(int32x1_t __a,int32x1_t __b)22135 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
22136 {
22137   return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
22138 }
22139 
22140 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqdmulls_lane_s32(int32x1_t __a,int32x4_t __b,const int __c)22141 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22142 {
22143   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
22144 }
22145 
22146 /* vqmovn */
22147 
22148 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqmovn_s16(int16x8_t __a)22149 vqmovn_s16 (int16x8_t __a)
22150 {
22151   return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
22152 }
22153 
22154 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqmovn_s32(int32x4_t __a)22155 vqmovn_s32 (int32x4_t __a)
22156 {
22157   return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
22158 }
22159 
22160 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqmovn_s64(int64x2_t __a)22161 vqmovn_s64 (int64x2_t __a)
22162 {
22163   return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
22164 }
22165 
22166 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqmovn_u16(uint16x8_t __a)22167 vqmovn_u16 (uint16x8_t __a)
22168 {
22169   return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
22170 }
22171 
22172 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqmovn_u32(uint32x4_t __a)22173 vqmovn_u32 (uint32x4_t __a)
22174 {
22175   return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
22176 }
22177 
22178 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqmovn_u64(uint64x2_t __a)22179 vqmovn_u64 (uint64x2_t __a)
22180 {
22181   return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
22182 }
22183 
22184 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqmovnh_s16(int16x1_t __a)22185 vqmovnh_s16 (int16x1_t __a)
22186 {
22187   return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
22188 }
22189 
22190 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqmovns_s32(int32x1_t __a)22191 vqmovns_s32 (int32x1_t __a)
22192 {
22193   return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
22194 }
22195 
22196 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqmovnd_s64(int64x1_t __a)22197 vqmovnd_s64 (int64x1_t __a)
22198 {
22199   return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
22200 }
22201 
22202 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqmovnh_u16(uint16x1_t __a)22203 vqmovnh_u16 (uint16x1_t __a)
22204 {
22205   return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
22206 }
22207 
22208 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqmovns_u32(uint32x1_t __a)22209 vqmovns_u32 (uint32x1_t __a)
22210 {
22211   return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
22212 }
22213 
22214 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqmovnd_u64(uint64x1_t __a)22215 vqmovnd_u64 (uint64x1_t __a)
22216 {
22217   return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
22218 }
22219 
22220 /* vqmovun */
22221 
22222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqmovun_s16(int16x8_t __a)22223 vqmovun_s16 (int16x8_t __a)
22224 {
22225   return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
22226 }
22227 
22228 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqmovun_s32(int32x4_t __a)22229 vqmovun_s32 (int32x4_t __a)
22230 {
22231   return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
22232 }
22233 
22234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqmovun_s64(int64x2_t __a)22235 vqmovun_s64 (int64x2_t __a)
22236 {
22237   return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
22238 }
22239 
22240 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqmovunh_s16(int16x1_t __a)22241 vqmovunh_s16 (int16x1_t __a)
22242 {
22243   return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
22244 }
22245 
22246 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqmovuns_s32(int32x1_t __a)22247 vqmovuns_s32 (int32x1_t __a)
22248 {
22249   return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
22250 }
22251 
22252 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqmovund_s64(int64x1_t __a)22253 vqmovund_s64 (int64x1_t __a)
22254 {
22255   return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
22256 }
22257 
22258 /* vqneg */
22259 
22260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqnegq_s64(int64x2_t __a)22261 vqnegq_s64 (int64x2_t __a)
22262 {
22263   return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
22264 }
22265 
22266 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqnegb_s8(int8x1_t __a)22267 vqnegb_s8 (int8x1_t __a)
22268 {
22269   return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
22270 }
22271 
22272 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqnegh_s16(int16x1_t __a)22273 vqnegh_s16 (int16x1_t __a)
22274 {
22275   return (int16x1_t) __builtin_aarch64_sqneghi (__a);
22276 }
22277 
22278 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqnegs_s32(int32x1_t __a)22279 vqnegs_s32 (int32x1_t __a)
22280 {
22281   return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
22282 }
22283 
22284 /* vqrdmulh */
22285 
22286 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmulh_lane_s16(int16x4_t __a,int16x4_t __b,const int __c)22287 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22288 {
22289   return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
22290 }
22291 
22292 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmulh_lane_s32(int32x2_t __a,int32x2_t __b,const int __c)22293 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22294 {
22295   return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
22296 }
22297 
22298 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmulhq_lane_s16(int16x8_t __a,int16x4_t __b,const int __c)22299 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
22300 {
22301   return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
22302 }
22303 
22304 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmulhq_lane_s32(int32x4_t __a,int32x2_t __b,const int __c)22305 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
22306 {
22307   return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
22308 }
22309 
22310 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqrdmulhh_s16(int16x1_t __a,int16x1_t __b)22311 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
22312 {
22313   return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
22314 }
22315 
22316 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqrdmulhh_lane_s16(int16x1_t __a,int16x8_t __b,const int __c)22317 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22318 {
22319   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
22320 }
22321 
22322 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrdmulhs_s32(int32x1_t __a,int32x1_t __b)22323 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
22324 {
22325   return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
22326 }
22327 
22328 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrdmulhs_lane_s32(int32x1_t __a,int32x4_t __b,const int __c)22329 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22330 {
22331   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
22332 }
22333 
22334 /* vqrshl */
22335 
22336 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqrshl_s8(int8x8_t __a,int8x8_t __b)22337 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
22338 {
22339   return __builtin_aarch64_sqrshlv8qi (__a, __b);
22340 }
22341 
22342 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrshl_s16(int16x4_t __a,int16x4_t __b)22343 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
22344 {
22345   return __builtin_aarch64_sqrshlv4hi (__a, __b);
22346 }
22347 
22348 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrshl_s32(int32x2_t __a,int32x2_t __b)22349 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
22350 {
22351   return __builtin_aarch64_sqrshlv2si (__a, __b);
22352 }
22353 
22354 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqrshl_s64(int64x1_t __a,int64x1_t __b)22355 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
22356 {
22357   return __builtin_aarch64_sqrshldi (__a, __b);
22358 }
22359 
22360 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshl_u8(uint8x8_t __a,int8x8_t __b)22361 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
22362 {
22363   return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
22364 }
22365 
22366 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshl_u16(uint16x4_t __a,int16x4_t __b)22367 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
22368 {
22369   return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
22370 }
22371 
22372 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshl_u32(uint32x2_t __a,int32x2_t __b)22373 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
22374 {
22375   return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
22376 }
22377 
22378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqrshl_u64(uint64x1_t __a,int64x1_t __b)22379 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
22380 {
22381   return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
22382 }
22383 
22384 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqrshlq_s8(int8x16_t __a,int8x16_t __b)22385 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
22386 {
22387   return __builtin_aarch64_sqrshlv16qi (__a, __b);
22388 }
22389 
22390 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrshlq_s16(int16x8_t __a,int16x8_t __b)22391 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
22392 {
22393   return __builtin_aarch64_sqrshlv8hi (__a, __b);
22394 }
22395 
22396 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrshlq_s32(int32x4_t __a,int32x4_t __b)22397 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
22398 {
22399   return __builtin_aarch64_sqrshlv4si (__a, __b);
22400 }
22401 
22402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqrshlq_s64(int64x2_t __a,int64x2_t __b)22403 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
22404 {
22405   return __builtin_aarch64_sqrshlv2di (__a, __b);
22406 }
22407 
22408 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqrshlq_u8(uint8x16_t __a,int8x16_t __b)22409 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22410 {
22411   return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
22412 }
22413 
22414 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqrshlq_u16(uint16x8_t __a,int16x8_t __b)22415 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22416 {
22417   return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
22418 }
22419 
22420 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqrshlq_u32(uint32x4_t __a,int32x4_t __b)22421 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22422 {
22423   return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
22424 }
22425 
22426 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqrshlq_u64(uint64x2_t __a,int64x2_t __b)22427 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22428 {
22429   return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
22430 }
22431 
22432 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqrshlb_s8(int8x1_t __a,int8x1_t __b)22433 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
22434 {
22435   return __builtin_aarch64_sqrshlqi (__a, __b);
22436 }
22437 
22438 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqrshlh_s16(int16x1_t __a,int16x1_t __b)22439 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
22440 {
22441   return __builtin_aarch64_sqrshlhi (__a, __b);
22442 }
22443 
22444 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrshls_s32(int32x1_t __a,int32x1_t __b)22445 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
22446 {
22447   return __builtin_aarch64_sqrshlsi (__a, __b);
22448 }
22449 
22450 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqrshld_s64(int64x1_t __a,int64x1_t __b)22451 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
22452 {
22453   return __builtin_aarch64_sqrshldi (__a, __b);
22454 }
22455 
22456 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqrshlb_u8(uint8x1_t __a,uint8x1_t __b)22457 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22458 {
22459   return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
22460 }
22461 
22462 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqrshlh_u16(uint16x1_t __a,uint16x1_t __b)22463 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22464 {
22465   return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
22466 }
22467 
22468 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqrshls_u32(uint32x1_t __a,uint32x1_t __b)22469 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22470 {
22471   return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22472 }
22473 
22474 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqrshld_u64(uint64x1_t __a,uint64x1_t __b)22475 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22476 {
22477   return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22478 }
22479 
22480 /* vqrshrn */
22481 
22482 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqrshrn_n_s16(int16x8_t __a,const int __b)22483 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22484 {
22485   return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22486 }
22487 
22488 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrshrn_n_s32(int32x4_t __a,const int __b)22489 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22490 {
22491   return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22492 }
22493 
22494 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrshrn_n_s64(int64x2_t __a,const int __b)22495 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22496 {
22497   return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22498 }
22499 
22500 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshrn_n_u16(uint16x8_t __a,const int __b)22501 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22502 {
22503   return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22504 }
22505 
22506 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshrn_n_u32(uint32x4_t __a,const int __b)22507 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22508 {
22509   return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22510 }
22511 
22512 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshrn_n_u64(uint64x2_t __a,const int __b)22513 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22514 {
22515   return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22516 }
22517 
22518 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqrshrnh_n_s16(int16x1_t __a,const int __b)22519 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22520 {
22521   return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22522 }
22523 
22524 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqrshrns_n_s32(int32x1_t __a,const int __b)22525 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22526 {
22527   return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22528 }
22529 
22530 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrshrnd_n_s64(int64x1_t __a,const int __b)22531 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22532 {
22533   return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22534 }
22535 
22536 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqrshrnh_n_u16(uint16x1_t __a,const int __b)22537 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22538 {
22539   return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22540 }
22541 
22542 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqrshrns_n_u32(uint32x1_t __a,const int __b)22543 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22544 {
22545   return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22546 }
22547 
22548 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqrshrnd_n_u64(uint64x1_t __a,const int __b)22549 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22550 {
22551   return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22552 }
22553 
22554 /* vqrshrun */
22555 
22556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqrshrun_n_s16(int16x8_t __a,const int __b)22557 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22558 {
22559   return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22560 }
22561 
22562 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqrshrun_n_s32(int32x4_t __a,const int __b)22563 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22564 {
22565   return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22566 }
22567 
22568 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqrshrun_n_s64(int64x2_t __a,const int __b)22569 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22570 {
22571   return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22572 }
22573 
22574 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqrshrunh_n_s16(int16x1_t __a,const int __b)22575 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22576 {
22577   return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22578 }
22579 
22580 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqrshruns_n_s32(int32x1_t __a,const int __b)22581 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22582 {
22583   return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22584 }
22585 
22586 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrshrund_n_s64(int64x1_t __a,const int __b)22587 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22588 {
22589   return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22590 }
22591 
22592 /* vqshl */
22593 
22594 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshl_s8(int8x8_t __a,int8x8_t __b)22595 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22596 {
22597   return __builtin_aarch64_sqshlv8qi (__a, __b);
22598 }
22599 
22600 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshl_s16(int16x4_t __a,int16x4_t __b)22601 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22602 {
22603   return __builtin_aarch64_sqshlv4hi (__a, __b);
22604 }
22605 
22606 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshl_s32(int32x2_t __a,int32x2_t __b)22607 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22608 {
22609   return __builtin_aarch64_sqshlv2si (__a, __b);
22610 }
22611 
22612 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshl_s64(int64x1_t __a,int64x1_t __b)22613 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22614 {
22615   return __builtin_aarch64_sqshldi (__a, __b);
22616 }
22617 
22618 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshl_u8(uint8x8_t __a,int8x8_t __b)22619 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22620 {
22621   return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22622 }
22623 
22624 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshl_u16(uint16x4_t __a,int16x4_t __b)22625 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22626 {
22627   return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22628 }
22629 
22630 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshl_u32(uint32x2_t __a,int32x2_t __b)22631 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22632 {
22633   return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22634 }
22635 
22636 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshl_u64(uint64x1_t __a,int64x1_t __b)22637 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22638 {
22639   return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22640 }
22641 
22642 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqshlq_s8(int8x16_t __a,int8x16_t __b)22643 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22644 {
22645   return __builtin_aarch64_sqshlv16qi (__a, __b);
22646 }
22647 
22648 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqshlq_s16(int16x8_t __a,int16x8_t __b)22649 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22650 {
22651   return __builtin_aarch64_sqshlv8hi (__a, __b);
22652 }
22653 
22654 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqshlq_s32(int32x4_t __a,int32x4_t __b)22655 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22656 {
22657   return __builtin_aarch64_sqshlv4si (__a, __b);
22658 }
22659 
22660 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqshlq_s64(int64x2_t __a,int64x2_t __b)22661 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22662 {
22663   return __builtin_aarch64_sqshlv2di (__a, __b);
22664 }
22665 
22666 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshlq_u8(uint8x16_t __a,int8x16_t __b)22667 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22668 {
22669   return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22670 }
22671 
22672 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshlq_u16(uint16x8_t __a,int16x8_t __b)22673 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22674 {
22675   return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22676 }
22677 
22678 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshlq_u32(uint32x4_t __a,int32x4_t __b)22679 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22680 {
22681   return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22682 }
22683 
22684 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshlq_u64(uint64x2_t __a,int64x2_t __b)22685 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22686 {
22687   return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22688 }
22689 
22690 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqshlb_s8(int8x1_t __a,int8x1_t __b)22691 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22692 {
22693   return __builtin_aarch64_sqshlqi (__a, __b);
22694 }
22695 
22696 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqshlh_s16(int16x1_t __a,int16x1_t __b)22697 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22698 {
22699   return __builtin_aarch64_sqshlhi (__a, __b);
22700 }
22701 
22702 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqshls_s32(int32x1_t __a,int32x1_t __b)22703 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22704 {
22705   return __builtin_aarch64_sqshlsi (__a, __b);
22706 }
22707 
22708 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshld_s64(int64x1_t __a,int64x1_t __b)22709 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22710 {
22711   return __builtin_aarch64_sqshldi (__a, __b);
22712 }
22713 
22714 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqshlb_u8(uint8x1_t __a,uint8x1_t __b)22715 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22716 {
22717   return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22718 }
22719 
22720 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqshlh_u16(uint16x1_t __a,uint16x1_t __b)22721 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22722 {
22723   return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22724 }
22725 
22726 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqshls_u32(uint32x1_t __a,uint32x1_t __b)22727 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22728 {
22729   return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22730 }
22731 
22732 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshld_u64(uint64x1_t __a,uint64x1_t __b)22733 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22734 {
22735   return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22736 }
22737 
22738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshl_n_s8(int8x8_t __a,const int __b)22739 vqshl_n_s8 (int8x8_t __a, const int __b)
22740 {
22741   return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22742 }
22743 
22744 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshl_n_s16(int16x4_t __a,const int __b)22745 vqshl_n_s16 (int16x4_t __a, const int __b)
22746 {
22747   return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22748 }
22749 
22750 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshl_n_s32(int32x2_t __a,const int __b)22751 vqshl_n_s32 (int32x2_t __a, const int __b)
22752 {
22753   return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22754 }
22755 
22756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshl_n_s64(int64x1_t __a,const int __b)22757 vqshl_n_s64 (int64x1_t __a, const int __b)
22758 {
22759   return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22760 }
22761 
22762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshl_n_u8(uint8x8_t __a,const int __b)22763 vqshl_n_u8 (uint8x8_t __a, const int __b)
22764 {
22765   return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22766 }
22767 
22768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshl_n_u16(uint16x4_t __a,const int __b)22769 vqshl_n_u16 (uint16x4_t __a, const int __b)
22770 {
22771   return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22772 }
22773 
22774 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshl_n_u32(uint32x2_t __a,const int __b)22775 vqshl_n_u32 (uint32x2_t __a, const int __b)
22776 {
22777   return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22778 }
22779 
22780 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshl_n_u64(uint64x1_t __a,const int __b)22781 vqshl_n_u64 (uint64x1_t __a, const int __b)
22782 {
22783   return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22784 }
22785 
22786 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqshlq_n_s8(int8x16_t __a,const int __b)22787 vqshlq_n_s8 (int8x16_t __a, const int __b)
22788 {
22789   return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22790 }
22791 
22792 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqshlq_n_s16(int16x8_t __a,const int __b)22793 vqshlq_n_s16 (int16x8_t __a, const int __b)
22794 {
22795   return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22796 }
22797 
22798 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqshlq_n_s32(int32x4_t __a,const int __b)22799 vqshlq_n_s32 (int32x4_t __a, const int __b)
22800 {
22801   return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22802 }
22803 
22804 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqshlq_n_s64(int64x2_t __a,const int __b)22805 vqshlq_n_s64 (int64x2_t __a, const int __b)
22806 {
22807   return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22808 }
22809 
22810 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshlq_n_u8(uint8x16_t __a,const int __b)22811 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22812 {
22813   return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22814 }
22815 
22816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshlq_n_u16(uint16x8_t __a,const int __b)22817 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22818 {
22819   return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22820 }
22821 
22822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshlq_n_u32(uint32x4_t __a,const int __b)22823 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22824 {
22825   return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22826 }
22827 
22828 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshlq_n_u64(uint64x2_t __a,const int __b)22829 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22830 {
22831   return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22832 }
22833 
22834 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqshlb_n_s8(int8x1_t __a,const int __b)22835 vqshlb_n_s8 (int8x1_t __a, const int __b)
22836 {
22837   return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22838 }
22839 
22840 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqshlh_n_s16(int16x1_t __a,const int __b)22841 vqshlh_n_s16 (int16x1_t __a, const int __b)
22842 {
22843   return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22844 }
22845 
22846 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqshls_n_s32(int32x1_t __a,const int __b)22847 vqshls_n_s32 (int32x1_t __a, const int __b)
22848 {
22849   return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22850 }
22851 
22852 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshld_n_s64(int64x1_t __a,const int __b)22853 vqshld_n_s64 (int64x1_t __a, const int __b)
22854 {
22855   return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22856 }
22857 
22858 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqshlb_n_u8(uint8x1_t __a,const int __b)22859 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22860 {
22861   return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22862 }
22863 
22864 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqshlh_n_u16(uint16x1_t __a,const int __b)22865 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22866 {
22867   return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22868 }
22869 
22870 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqshls_n_u32(uint32x1_t __a,const int __b)22871 vqshls_n_u32 (uint32x1_t __a, const int __b)
22872 {
22873   return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22874 }
22875 
22876 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshld_n_u64(uint64x1_t __a,const int __b)22877 vqshld_n_u64 (uint64x1_t __a, const int __b)
22878 {
22879   return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22880 }
22881 
22882 /* vqshlu */
22883 
22884 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshlu_n_s8(int8x8_t __a,const int __b)22885 vqshlu_n_s8 (int8x8_t __a, const int __b)
22886 {
22887   return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22888 }
22889 
22890 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshlu_n_s16(int16x4_t __a,const int __b)22891 vqshlu_n_s16 (int16x4_t __a, const int __b)
22892 {
22893   return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22894 }
22895 
22896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshlu_n_s32(int32x2_t __a,const int __b)22897 vqshlu_n_s32 (int32x2_t __a, const int __b)
22898 {
22899   return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22900 }
22901 
22902 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqshlu_n_s64(int64x1_t __a,const int __b)22903 vqshlu_n_s64 (int64x1_t __a, const int __b)
22904 {
22905   return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22906 }
22907 
22908 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqshluq_n_s8(int8x16_t __a,const int __b)22909 vqshluq_n_s8 (int8x16_t __a, const int __b)
22910 {
22911   return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22912 }
22913 
22914 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqshluq_n_s16(int16x8_t __a,const int __b)22915 vqshluq_n_s16 (int16x8_t __a, const int __b)
22916 {
22917   return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22918 }
22919 
22920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqshluq_n_s32(int32x4_t __a,const int __b)22921 vqshluq_n_s32 (int32x4_t __a, const int __b)
22922 {
22923   return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22924 }
22925 
22926 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vqshluq_n_s64(int64x2_t __a,const int __b)22927 vqshluq_n_s64 (int64x2_t __a, const int __b)
22928 {
22929   return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22930 }
22931 
22932 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqshlub_n_s8(int8x1_t __a,const int __b)22933 vqshlub_n_s8 (int8x1_t __a, const int __b)
22934 {
22935   return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22936 }
22937 
22938 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqshluh_n_s16(int16x1_t __a,const int __b)22939 vqshluh_n_s16 (int16x1_t __a, const int __b)
22940 {
22941   return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22942 }
22943 
22944 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqshlus_n_s32(int32x1_t __a,const int __b)22945 vqshlus_n_s32 (int32x1_t __a, const int __b)
22946 {
22947   return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22948 }
22949 
22950 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqshlud_n_s64(int64x1_t __a,const int __b)22951 vqshlud_n_s64 (int64x1_t __a, const int __b)
22952 {
22953   return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22954 }
22955 
22956 /* vqshrn */
22957 
22958 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vqshrn_n_s16(int16x8_t __a,const int __b)22959 vqshrn_n_s16 (int16x8_t __a, const int __b)
22960 {
22961   return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22962 }
22963 
22964 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqshrn_n_s32(int32x4_t __a,const int __b)22965 vqshrn_n_s32 (int32x4_t __a, const int __b)
22966 {
22967   return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22968 }
22969 
22970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqshrn_n_s64(int64x2_t __a,const int __b)22971 vqshrn_n_s64 (int64x2_t __a, const int __b)
22972 {
22973   return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22974 }
22975 
22976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshrn_n_u16(uint16x8_t __a,const int __b)22977 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22978 {
22979   return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22980 }
22981 
22982 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshrn_n_u32(uint32x4_t __a,const int __b)22983 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22984 {
22985   return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22986 }
22987 
22988 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshrn_n_u64(uint64x2_t __a,const int __b)22989 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22990 {
22991   return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22992 }
22993 
22994 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqshrnh_n_s16(int16x1_t __a,const int __b)22995 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22996 {
22997   return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22998 }
22999 
23000 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqshrns_n_s32(int32x1_t __a,const int __b)23001 vqshrns_n_s32 (int32x1_t __a, const int __b)
23002 {
23003   return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
23004 }
23005 
23006 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqshrnd_n_s64(int64x1_t __a,const int __b)23007 vqshrnd_n_s64 (int64x1_t __a, const int __b)
23008 {
23009   return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
23010 }
23011 
23012 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqshrnh_n_u16(uint16x1_t __a,const int __b)23013 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
23014 {
23015   return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
23016 }
23017 
23018 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqshrns_n_u32(uint32x1_t __a,const int __b)23019 vqshrns_n_u32 (uint32x1_t __a, const int __b)
23020 {
23021   return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
23022 }
23023 
23024 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqshrnd_n_u64(uint64x1_t __a,const int __b)23025 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
23026 {
23027   return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
23028 }
23029 
23030 /* vqshrun */
23031 
23032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vqshrun_n_s16(int16x8_t __a,const int __b)23033 vqshrun_n_s16 (int16x8_t __a, const int __b)
23034 {
23035   return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
23036 }
23037 
23038 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vqshrun_n_s32(int32x4_t __a,const int __b)23039 vqshrun_n_s32 (int32x4_t __a, const int __b)
23040 {
23041   return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
23042 }
23043 
23044 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vqshrun_n_s64(int64x2_t __a,const int __b)23045 vqshrun_n_s64 (int64x2_t __a, const int __b)
23046 {
23047   return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
23048 }
23049 
23050 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqshrunh_n_s16(int16x1_t __a,const int __b)23051 vqshrunh_n_s16 (int16x1_t __a, const int __b)
23052 {
23053   return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
23054 }
23055 
23056 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqshruns_n_s32(int32x1_t __a,const int __b)23057 vqshruns_n_s32 (int32x1_t __a, const int __b)
23058 {
23059   return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
23060 }
23061 
23062 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqshrund_n_s64(int64x1_t __a,const int __b)23063 vqshrund_n_s64 (int64x1_t __a, const int __b)
23064 {
23065   return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
23066 }
23067 
23068 /* vqsub */
23069 
23070 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vqsubb_s8(int8x1_t __a,int8x1_t __b)23071 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
23072 {
23073   return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
23074 }
23075 
23076 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vqsubh_s16(int16x1_t __a,int16x1_t __b)23077 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
23078 {
23079   return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
23080 }
23081 
23082 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqsubs_s32(int32x1_t __a,int32x1_t __b)23083 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
23084 {
23085   return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
23086 }
23087 
23088 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vqsubd_s64(int64x1_t __a,int64x1_t __b)23089 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
23090 {
23091   return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
23092 }
23093 
23094 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vqsubb_u8(uint8x1_t __a,uint8x1_t __b)23095 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
23096 {
23097   return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
23098 }
23099 
23100 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vqsubh_u16(uint16x1_t __a,uint16x1_t __b)23101 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
23102 {
23103   return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
23104 }
23105 
23106 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vqsubs_u32(uint32x1_t __a,uint32x1_t __b)23107 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
23108 {
23109   return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
23110 }
23111 
23112 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vqsubd_u64(uint64x1_t __a,uint64x1_t __b)23113 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
23114 {
23115   return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
23116 }
23117 
23118 /* vrshl */
23119 
23120 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrshl_s8(int8x8_t __a,int8x8_t __b)23121 vrshl_s8 (int8x8_t __a, int8x8_t __b)
23122 {
23123   return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
23124 }
23125 
23126 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrshl_s16(int16x4_t __a,int16x4_t __b)23127 vrshl_s16 (int16x4_t __a, int16x4_t __b)
23128 {
23129   return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
23130 }
23131 
23132 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrshl_s32(int32x2_t __a,int32x2_t __b)23133 vrshl_s32 (int32x2_t __a, int32x2_t __b)
23134 {
23135   return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
23136 }
23137 
23138 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshl_s64(int64x1_t __a,int64x1_t __b)23139 vrshl_s64 (int64x1_t __a, int64x1_t __b)
23140 {
23141   return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23142 }
23143 
23144 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrshl_u8(uint8x8_t __a,int8x8_t __b)23145 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
23146 {
23147   return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
23148 }
23149 
23150 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrshl_u16(uint16x4_t __a,int16x4_t __b)23151 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
23152 {
23153   return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
23154 }
23155 
23156 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrshl_u32(uint32x2_t __a,int32x2_t __b)23157 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
23158 {
23159   return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
23160 }
23161 
23162 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshl_u64(uint64x1_t __a,int64x1_t __b)23163 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
23164 {
23165   return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
23166 }
23167 
23168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrshlq_s8(int8x16_t __a,int8x16_t __b)23169 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
23170 {
23171   return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
23172 }
23173 
23174 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrshlq_s16(int16x8_t __a,int16x8_t __b)23175 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
23176 {
23177   return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
23178 }
23179 
23180 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrshlq_s32(int32x4_t __a,int32x4_t __b)23181 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
23182 {
23183   return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
23184 }
23185 
23186 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrshlq_s64(int64x2_t __a,int64x2_t __b)23187 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
23188 {
23189   return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
23190 }
23191 
23192 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrshlq_u8(uint8x16_t __a,int8x16_t __b)23193 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
23194 {
23195   return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
23196 }
23197 
23198 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrshlq_u16(uint16x8_t __a,int16x8_t __b)23199 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
23200 {
23201   return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
23202 }
23203 
23204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrshlq_u32(uint32x4_t __a,int32x4_t __b)23205 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
23206 {
23207   return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
23208 }
23209 
23210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrshlq_u64(uint64x2_t __a,int64x2_t __b)23211 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
23212 {
23213   return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
23214 }
23215 
23216 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshld_s64(int64x1_t __a,int64x1_t __b)23217 vrshld_s64 (int64x1_t __a, int64x1_t __b)
23218 {
23219   return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23220 }
23221 
23222 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshld_u64(uint64x1_t __a,uint64x1_t __b)23223 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
23224 {
23225   return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
23226 }
23227 
23228 /* vrshr */
23229 
23230 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrshr_n_s8(int8x8_t __a,const int __b)23231 vrshr_n_s8 (int8x8_t __a, const int __b)
23232 {
23233   return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
23234 }
23235 
23236 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrshr_n_s16(int16x4_t __a,const int __b)23237 vrshr_n_s16 (int16x4_t __a, const int __b)
23238 {
23239   return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
23240 }
23241 
23242 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrshr_n_s32(int32x2_t __a,const int __b)23243 vrshr_n_s32 (int32x2_t __a, const int __b)
23244 {
23245   return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
23246 }
23247 
23248 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshr_n_s64(int64x1_t __a,const int __b)23249 vrshr_n_s64 (int64x1_t __a, const int __b)
23250 {
23251   return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23252 }
23253 
23254 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrshr_n_u8(uint8x8_t __a,const int __b)23255 vrshr_n_u8 (uint8x8_t __a, const int __b)
23256 {
23257   return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23258 }
23259 
23260 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrshr_n_u16(uint16x4_t __a,const int __b)23261 vrshr_n_u16 (uint16x4_t __a, const int __b)
23262 {
23263   return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23264 }
23265 
23266 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrshr_n_u32(uint32x2_t __a,const int __b)23267 vrshr_n_u32 (uint32x2_t __a, const int __b)
23268 {
23269   return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23270 }
23271 
23272 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshr_n_u64(uint64x1_t __a,const int __b)23273 vrshr_n_u64 (uint64x1_t __a, const int __b)
23274 {
23275   return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23276 }
23277 
23278 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrshrq_n_s8(int8x16_t __a,const int __b)23279 vrshrq_n_s8 (int8x16_t __a, const int __b)
23280 {
23281   return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23282 }
23283 
23284 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrshrq_n_s16(int16x8_t __a,const int __b)23285 vrshrq_n_s16 (int16x8_t __a, const int __b)
23286 {
23287   return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23288 }
23289 
23290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrshrq_n_s32(int32x4_t __a,const int __b)23291 vrshrq_n_s32 (int32x4_t __a, const int __b)
23292 {
23293   return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23294 }
23295 
23296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrshrq_n_s64(int64x2_t __a,const int __b)23297 vrshrq_n_s64 (int64x2_t __a, const int __b)
23298 {
23299   return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23300 }
23301 
23302 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrshrq_n_u8(uint8x16_t __a,const int __b)23303 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23304 {
23305   return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23306 }
23307 
23308 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrshrq_n_u16(uint16x8_t __a,const int __b)23309 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23310 {
23311   return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23312 }
23313 
23314 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrshrq_n_u32(uint32x4_t __a,const int __b)23315 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23316 {
23317   return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23318 }
23319 
23320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrshrq_n_u64(uint64x2_t __a,const int __b)23321 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23322 {
23323   return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23324 }
23325 
23326 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrshrd_n_s64(int64x1_t __a,const int __b)23327 vrshrd_n_s64 (int64x1_t __a, const int __b)
23328 {
23329   return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23330 }
23331 
23332 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrshrd_n_u64(uint64x1_t __a,const int __b)23333 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23334 {
23335   return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23336 }
23337 
23338 /* vrsra */
23339 
23340 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrsra_n_s8(int8x8_t __a,int8x8_t __b,const int __c)23341 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23342 {
23343   return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23344 }
23345 
23346 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vrsra_n_s16(int16x4_t __a,int16x4_t __b,const int __c)23347 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23348 {
23349   return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23350 }
23351 
23352 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vrsra_n_s32(int32x2_t __a,int32x2_t __b,const int __c)23353 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23354 {
23355   return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23356 }
23357 
23358 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrsra_n_s64(int64x1_t __a,int64x1_t __b,const int __c)23359 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23360 {
23361   return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23362 }
23363 
23364 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrsra_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)23365 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23366 {
23367   return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23368 						    (int8x8_t) __b, __c);
23369 }
23370 
23371 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vrsra_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)23372 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23373 {
23374   return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23375 						     (int16x4_t) __b, __c);
23376 }
23377 
23378 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrsra_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)23379 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23380 {
23381   return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23382 						     (int32x2_t) __b, __c);
23383 }
23384 
23385 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrsra_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)23386 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23387 {
23388   return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23389 						   (int64x1_t) __b, __c);
23390 }
23391 
23392 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrsraq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)23393 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23394 {
23395   return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23396 }
23397 
23398 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrsraq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)23399 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23400 {
23401   return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23402 }
23403 
23404 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrsraq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)23405 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23406 {
23407   return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23408 }
23409 
23410 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vrsraq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)23411 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23412 {
23413   return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23414 }
23415 
23416 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrsraq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)23417 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23418 {
23419   return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23420 						      (int8x16_t) __b, __c);
23421 }
23422 
23423 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrsraq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)23424 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23425 {
23426   return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23427 						     (int16x8_t) __b, __c);
23428 }
23429 
23430 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsraq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)23431 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23432 {
23433   return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23434 						     (int32x4_t) __b, __c);
23435 }
23436 
23437 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vrsraq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)23438 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23439 {
23440   return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23441 						     (int64x2_t) __b, __c);
23442 }
23443 
23444 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vrsrad_n_s64(int64x1_t __a,int64x1_t __b,const int __c)23445 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23446 {
23447   return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23448 }
23449 
23450 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vrsrad_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)23451 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23452 {
23453   return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23454 }
23455 
23456 /* vshl */
23457 
23458 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshl_n_s8(int8x8_t __a,const int __b)23459 vshl_n_s8 (int8x8_t __a, const int __b)
23460 {
23461   return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b);
23462 }
23463 
23464 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshl_n_s16(int16x4_t __a,const int __b)23465 vshl_n_s16 (int16x4_t __a, const int __b)
23466 {
23467   return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b);
23468 }
23469 
23470 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshl_n_s32(int32x2_t __a,const int __b)23471 vshl_n_s32 (int32x2_t __a, const int __b)
23472 {
23473   return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b);
23474 }
23475 
23476 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshl_n_s64(int64x1_t __a,const int __b)23477 vshl_n_s64 (int64x1_t __a, const int __b)
23478 {
23479   return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
23480 }
23481 
23482 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshl_n_u8(uint8x8_t __a,const int __b)23483 vshl_n_u8 (uint8x8_t __a, const int __b)
23484 {
23485   return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b);
23486 }
23487 
23488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshl_n_u16(uint16x4_t __a,const int __b)23489 vshl_n_u16 (uint16x4_t __a, const int __b)
23490 {
23491   return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b);
23492 }
23493 
23494 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshl_n_u32(uint32x2_t __a,const int __b)23495 vshl_n_u32 (uint32x2_t __a, const int __b)
23496 {
23497   return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b);
23498 }
23499 
23500 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshl_n_u64(uint64x1_t __a,const int __b)23501 vshl_n_u64 (uint64x1_t __a, const int __b)
23502 {
23503   return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b);
23504 }
23505 
23506 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshlq_n_s8(int8x16_t __a,const int __b)23507 vshlq_n_s8 (int8x16_t __a, const int __b)
23508 {
23509   return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b);
23510 }
23511 
23512 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshlq_n_s16(int16x8_t __a,const int __b)23513 vshlq_n_s16 (int16x8_t __a, const int __b)
23514 {
23515   return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b);
23516 }
23517 
23518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshlq_n_s32(int32x4_t __a,const int __b)23519 vshlq_n_s32 (int32x4_t __a, const int __b)
23520 {
23521   return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b);
23522 }
23523 
23524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshlq_n_s64(int64x2_t __a,const int __b)23525 vshlq_n_s64 (int64x2_t __a, const int __b)
23526 {
23527   return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b);
23528 }
23529 
23530 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshlq_n_u8(uint8x16_t __a,const int __b)23531 vshlq_n_u8 (uint8x16_t __a, const int __b)
23532 {
23533   return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b);
23534 }
23535 
23536 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshlq_n_u16(uint16x8_t __a,const int __b)23537 vshlq_n_u16 (uint16x8_t __a, const int __b)
23538 {
23539   return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b);
23540 }
23541 
23542 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshlq_n_u32(uint32x4_t __a,const int __b)23543 vshlq_n_u32 (uint32x4_t __a, const int __b)
23544 {
23545   return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b);
23546 }
23547 
23548 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshlq_n_u64(uint64x2_t __a,const int __b)23549 vshlq_n_u64 (uint64x2_t __a, const int __b)
23550 {
23551   return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b);
23552 }
23553 
23554 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshld_n_s64(int64x1_t __a,const int __b)23555 vshld_n_s64 (int64x1_t __a, const int __b)
23556 {
23557   return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b);
23558 }
23559 
23560 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshld_n_u64(uint64x1_t __a,const int __b)23561 vshld_n_u64 (uint64x1_t __a, const int __b)
23562 {
23563   return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b);
23564 }
23565 
23566 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshl_s8(int8x8_t __a,int8x8_t __b)23567 vshl_s8 (int8x8_t __a, int8x8_t __b)
23568 {
23569   return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23570 }
23571 
23572 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshl_s16(int16x4_t __a,int16x4_t __b)23573 vshl_s16 (int16x4_t __a, int16x4_t __b)
23574 {
23575   return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23576 }
23577 
23578 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshl_s32(int32x2_t __a,int32x2_t __b)23579 vshl_s32 (int32x2_t __a, int32x2_t __b)
23580 {
23581   return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23582 }
23583 
23584 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshl_s64(int64x1_t __a,int64x1_t __b)23585 vshl_s64 (int64x1_t __a, int64x1_t __b)
23586 {
23587   return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23588 }
23589 
23590 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshl_u8(uint8x8_t __a,int8x8_t __b)23591 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23592 {
23593   return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23594 }
23595 
23596 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshl_u16(uint16x4_t __a,int16x4_t __b)23597 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23598 {
23599   return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23600 }
23601 
23602 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshl_u32(uint32x2_t __a,int32x2_t __b)23603 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23604 {
23605   return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23606 }
23607 
23608 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshl_u64(uint64x1_t __a,int64x1_t __b)23609 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23610 {
23611   return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23612 }
23613 
23614 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshlq_s8(int8x16_t __a,int8x16_t __b)23615 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23616 {
23617   return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23618 }
23619 
23620 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshlq_s16(int16x8_t __a,int16x8_t __b)23621 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23622 {
23623   return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23624 }
23625 
23626 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshlq_s32(int32x4_t __a,int32x4_t __b)23627 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23628 {
23629   return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23630 }
23631 
23632 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshlq_s64(int64x2_t __a,int64x2_t __b)23633 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23634 {
23635   return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23636 }
23637 
23638 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshlq_u8(uint8x16_t __a,int8x16_t __b)23639 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23640 {
23641   return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23642 }
23643 
23644 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshlq_u16(uint16x8_t __a,int16x8_t __b)23645 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23646 {
23647   return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23648 }
23649 
23650 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshlq_u32(uint32x4_t __a,int32x4_t __b)23651 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23652 {
23653   return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23654 }
23655 
23656 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshlq_u64(uint64x2_t __a,int64x2_t __b)23657 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23658 {
23659   return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23660 }
23661 
23662 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshld_s64(int64x1_t __a,int64x1_t __b)23663 vshld_s64 (int64x1_t __a, int64x1_t __b)
23664 {
23665   return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23666 }
23667 
23668 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshld_u64(uint64x1_t __a,uint64x1_t __b)23669 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23670 {
23671   return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23672 }
23673 
23674 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshll_high_n_s8(int8x16_t __a,const int __b)23675 vshll_high_n_s8 (int8x16_t __a, const int __b)
23676 {
23677   return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23678 }
23679 
23680 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshll_high_n_s16(int16x8_t __a,const int __b)23681 vshll_high_n_s16 (int16x8_t __a, const int __b)
23682 {
23683   return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23684 }
23685 
23686 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshll_high_n_s32(int32x4_t __a,const int __b)23687 vshll_high_n_s32 (int32x4_t __a, const int __b)
23688 {
23689   return __builtin_aarch64_sshll2_nv4si (__a, __b);
23690 }
23691 
23692 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshll_high_n_u8(uint8x16_t __a,const int __b)23693 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23694 {
23695   return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23696 }
23697 
23698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshll_high_n_u16(uint16x8_t __a,const int __b)23699 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23700 {
23701   return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23702 }
23703 
23704 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshll_high_n_u32(uint32x4_t __a,const int __b)23705 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23706 {
23707   return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23708 }
23709 
23710 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshll_n_s8(int8x8_t __a,const int __b)23711 vshll_n_s8 (int8x8_t __a, const int __b)
23712 {
23713   return __builtin_aarch64_sshll_nv8qi (__a, __b);
23714 }
23715 
23716 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshll_n_s16(int16x4_t __a,const int __b)23717 vshll_n_s16 (int16x4_t __a, const int __b)
23718 {
23719   return __builtin_aarch64_sshll_nv4hi (__a, __b);
23720 }
23721 
23722 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshll_n_s32(int32x2_t __a,const int __b)23723 vshll_n_s32 (int32x2_t __a, const int __b)
23724 {
23725   return __builtin_aarch64_sshll_nv2si (__a, __b);
23726 }
23727 
23728 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshll_n_u8(uint8x8_t __a,const int __b)23729 vshll_n_u8 (uint8x8_t __a, const int __b)
23730 {
23731   return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23732 }
23733 
23734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshll_n_u16(uint16x4_t __a,const int __b)23735 vshll_n_u16 (uint16x4_t __a, const int __b)
23736 {
23737   return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23738 }
23739 
23740 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshll_n_u32(uint32x2_t __a,const int __b)23741 vshll_n_u32 (uint32x2_t __a, const int __b)
23742 {
23743   return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23744 }
23745 
23746 /* vshr */
23747 
23748 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vshr_n_s8(int8x8_t __a,const int __b)23749 vshr_n_s8 (int8x8_t __a, const int __b)
23750 {
23751   return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b);
23752 }
23753 
23754 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vshr_n_s16(int16x4_t __a,const int __b)23755 vshr_n_s16 (int16x4_t __a, const int __b)
23756 {
23757   return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b);
23758 }
23759 
23760 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vshr_n_s32(int32x2_t __a,const int __b)23761 vshr_n_s32 (int32x2_t __a, const int __b)
23762 {
23763   return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b);
23764 }
23765 
23766 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshr_n_s64(int64x1_t __a,const int __b)23767 vshr_n_s64 (int64x1_t __a, const int __b)
23768 {
23769   return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
23770 }
23771 
23772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vshr_n_u8(uint8x8_t __a,const int __b)23773 vshr_n_u8 (uint8x8_t __a, const int __b)
23774 {
23775   return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b);
23776 }
23777 
23778 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vshr_n_u16(uint16x4_t __a,const int __b)23779 vshr_n_u16 (uint16x4_t __a, const int __b)
23780 {
23781   return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b);
23782 }
23783 
23784 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vshr_n_u32(uint32x2_t __a,const int __b)23785 vshr_n_u32 (uint32x2_t __a, const int __b)
23786 {
23787   return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b);
23788 }
23789 
23790 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshr_n_u64(uint64x1_t __a,const int __b)23791 vshr_n_u64 (uint64x1_t __a, const int __b)
23792 {
23793   return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b);
23794 }
23795 
23796 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vshrq_n_s8(int8x16_t __a,const int __b)23797 vshrq_n_s8 (int8x16_t __a, const int __b)
23798 {
23799   return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b);
23800 }
23801 
23802 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vshrq_n_s16(int16x8_t __a,const int __b)23803 vshrq_n_s16 (int16x8_t __a, const int __b)
23804 {
23805   return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b);
23806 }
23807 
23808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vshrq_n_s32(int32x4_t __a,const int __b)23809 vshrq_n_s32 (int32x4_t __a, const int __b)
23810 {
23811   return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b);
23812 }
23813 
23814 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vshrq_n_s64(int64x2_t __a,const int __b)23815 vshrq_n_s64 (int64x2_t __a, const int __b)
23816 {
23817   return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b);
23818 }
23819 
23820 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vshrq_n_u8(uint8x16_t __a,const int __b)23821 vshrq_n_u8 (uint8x16_t __a, const int __b)
23822 {
23823   return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b);
23824 }
23825 
23826 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vshrq_n_u16(uint16x8_t __a,const int __b)23827 vshrq_n_u16 (uint16x8_t __a, const int __b)
23828 {
23829   return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b);
23830 }
23831 
23832 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vshrq_n_u32(uint32x4_t __a,const int __b)23833 vshrq_n_u32 (uint32x4_t __a, const int __b)
23834 {
23835   return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b);
23836 }
23837 
23838 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vshrq_n_u64(uint64x2_t __a,const int __b)23839 vshrq_n_u64 (uint64x2_t __a, const int __b)
23840 {
23841   return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b);
23842 }
23843 
23844 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vshrd_n_s64(int64x1_t __a,const int __b)23845 vshrd_n_s64 (int64x1_t __a, const int __b)
23846 {
23847   return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b);
23848 }
23849 
23850 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vshrd_n_u64(uint64x1_t __a,const int __b)23851 vshrd_n_u64 (uint64x1_t __a, const int __b)
23852 {
23853   return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b);
23854 }
23855 
23856 /* vsli */
23857 
23858 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsli_n_s8(int8x8_t __a,int8x8_t __b,const int __c)23859 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23860 {
23861   return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23862 }
23863 
23864 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsli_n_s16(int16x4_t __a,int16x4_t __b,const int __c)23865 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23866 {
23867   return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23868 }
23869 
23870 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsli_n_s32(int32x2_t __a,int32x2_t __b,const int __c)23871 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23872 {
23873   return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23874 }
23875 
23876 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsli_n_s64(int64x1_t __a,int64x1_t __b,const int __c)23877 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23878 {
23879   return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23880 }
23881 
23882 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsli_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)23883 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23884 {
23885   return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23886 						   (int8x8_t) __b, __c);
23887 }
23888 
23889 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsli_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)23890 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23891 {
23892   return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23893 						    (int16x4_t) __b, __c);
23894 }
23895 
23896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsli_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)23897 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23898 {
23899   return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23900 						    (int32x2_t) __b, __c);
23901 }
23902 
23903 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsli_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)23904 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23905 {
23906   return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23907 						  (int64x1_t) __b, __c);
23908 }
23909 
23910 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsliq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)23911 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23912 {
23913   return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23914 }
23915 
23916 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsliq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)23917 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23918 {
23919   return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23920 }
23921 
23922 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsliq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)23923 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23924 {
23925   return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23926 }
23927 
23928 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsliq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)23929 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23930 {
23931   return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23932 }
23933 
23934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsliq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)23935 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23936 {
23937   return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23938 						     (int8x16_t) __b, __c);
23939 }
23940 
23941 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsliq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)23942 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23943 {
23944   return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23945 						    (int16x8_t) __b, __c);
23946 }
23947 
23948 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsliq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)23949 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23950 {
23951   return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23952 						    (int32x4_t) __b, __c);
23953 }
23954 
23955 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsliq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)23956 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23957 {
23958   return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23959 						    (int64x2_t) __b, __c);
23960 }
23961 
23962 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vslid_n_s64(int64x1_t __a,int64x1_t __b,const int __c)23963 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23964 {
23965   return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23966 }
23967 
23968 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vslid_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)23969 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23970 {
23971   return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23972 }
23973 
23974 /* vsqadd */
23975 
23976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsqadd_u8(uint8x8_t __a,int8x8_t __b)23977 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23978 {
23979   return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23980 						   (int8x8_t) __b);
23981 }
23982 
23983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsqadd_u16(uint16x4_t __a,int16x4_t __b)23984 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23985 {
23986   return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23987 						    (int16x4_t) __b);
23988 }
23989 
23990 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsqadd_u32(uint32x2_t __a,int32x2_t __b)23991 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23992 {
23993   return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23994 						    (int32x2_t) __b);
23995 }
23996 
23997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsqadd_u64(uint64x1_t __a,int64x1_t __b)23998 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23999 {
24000   return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
24001 }
24002 
24003 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsqaddq_u8(uint8x16_t __a,int8x16_t __b)24004 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
24005 {
24006   return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
24007 						     (int8x16_t) __b);
24008 }
24009 
24010 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsqaddq_u16(uint16x8_t __a,int16x8_t __b)24011 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
24012 {
24013   return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
24014 						    (int16x8_t) __b);
24015 }
24016 
24017 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsqaddq_u32(uint32x4_t __a,int32x4_t __b)24018 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
24019 {
24020   return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
24021 						    (int32x4_t) __b);
24022 }
24023 
24024 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsqaddq_u64(uint64x2_t __a,int64x2_t __b)24025 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
24026 {
24027   return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
24028 						    (int64x2_t) __b);
24029 }
24030 
24031 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vsqaddb_u8(uint8x1_t __a,int8x1_t __b)24032 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
24033 {
24034   return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
24035 }
24036 
24037 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vsqaddh_u16(uint16x1_t __a,int16x1_t __b)24038 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
24039 {
24040   return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
24041 }
24042 
24043 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vsqadds_u32(uint32x1_t __a,int32x1_t __b)24044 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
24045 {
24046   return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
24047 }
24048 
24049 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsqaddd_u64(uint64x1_t __a,int64x1_t __b)24050 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
24051 {
24052   return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
24053 }
24054 
24055 /* vsqrt */
24056 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vsqrt_f32(float32x2_t a)24057 vsqrt_f32 (float32x2_t a)
24058 {
24059   return __builtin_aarch64_sqrtv2sf (a);
24060 }
24061 
24062 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsqrtq_f32(float32x4_t a)24063 vsqrtq_f32 (float32x4_t a)
24064 {
24065   return __builtin_aarch64_sqrtv4sf (a);
24066 }
24067 
24068 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vsqrtq_f64(float64x2_t a)24069 vsqrtq_f64 (float64x2_t a)
24070 {
24071   return __builtin_aarch64_sqrtv2df (a);
24072 }
24073 
24074 /* vsra */
24075 
24076 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsra_n_s8(int8x8_t __a,int8x8_t __b,const int __c)24077 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24078 {
24079   return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
24080 }
24081 
24082 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsra_n_s16(int16x4_t __a,int16x4_t __b,const int __c)24083 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24084 {
24085   return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
24086 }
24087 
24088 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsra_n_s32(int32x2_t __a,int32x2_t __b,const int __c)24089 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24090 {
24091   return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
24092 }
24093 
24094 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsra_n_s64(int64x1_t __a,int64x1_t __b,const int __c)24095 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24096 {
24097   return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24098 }
24099 
24100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsra_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)24101 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24102 {
24103   return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
24104 						   (int8x8_t) __b, __c);
24105 }
24106 
24107 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsra_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)24108 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24109 {
24110   return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
24111 						    (int16x4_t) __b, __c);
24112 }
24113 
24114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsra_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)24115 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24116 {
24117   return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
24118 						    (int32x2_t) __b, __c);
24119 }
24120 
24121 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsra_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)24122 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24123 {
24124   return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
24125 						  (int64x1_t) __b, __c);
24126 }
24127 
24128 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsraq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)24129 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24130 {
24131   return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
24132 }
24133 
24134 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsraq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)24135 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24136 {
24137   return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
24138 }
24139 
24140 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsraq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)24141 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24142 {
24143   return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
24144 }
24145 
24146 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsraq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)24147 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24148 {
24149   return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
24150 }
24151 
24152 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsraq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)24153 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24154 {
24155   return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
24156 						     (int8x16_t) __b, __c);
24157 }
24158 
24159 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsraq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)24160 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24161 {
24162   return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
24163 						    (int16x8_t) __b, __c);
24164 }
24165 
24166 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsraq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)24167 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24168 {
24169   return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
24170 						    (int32x4_t) __b, __c);
24171 }
24172 
24173 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsraq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)24174 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24175 {
24176   return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
24177 						    (int64x2_t) __b, __c);
24178 }
24179 
24180 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsrad_n_s64(int64x1_t __a,int64x1_t __b,const int __c)24181 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24182 {
24183   return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24184 }
24185 
24186 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsrad_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)24187 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24188 {
24189   return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
24190 }
24191 
24192 /* vsri */
24193 
24194 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsri_n_s8(int8x8_t __a,int8x8_t __b,const int __c)24195 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24196 {
24197   return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
24198 }
24199 
24200 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vsri_n_s16(int16x4_t __a,int16x4_t __b,const int __c)24201 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24202 {
24203   return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
24204 }
24205 
24206 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vsri_n_s32(int32x2_t __a,int32x2_t __b,const int __c)24207 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24208 {
24209   return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
24210 }
24211 
24212 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsri_n_s64(int64x1_t __a,int64x1_t __b,const int __c)24213 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24214 {
24215   return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24216 }
24217 
24218 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vsri_n_u8(uint8x8_t __a,uint8x8_t __b,const int __c)24219 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24220 {
24221   return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
24222 						   (int8x8_t) __b, __c);
24223 }
24224 
24225 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vsri_n_u16(uint16x4_t __a,uint16x4_t __b,const int __c)24226 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24227 {
24228   return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
24229 						    (int16x4_t) __b, __c);
24230 }
24231 
24232 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vsri_n_u32(uint32x2_t __a,uint32x2_t __b,const int __c)24233 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24234 {
24235   return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
24236 						    (int32x2_t) __b, __c);
24237 }
24238 
24239 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsri_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)24240 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24241 {
24242   return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
24243 						  (int64x1_t) __b, __c);
24244 }
24245 
24246 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsriq_n_s8(int8x16_t __a,int8x16_t __b,const int __c)24247 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24248 {
24249   return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
24250 }
24251 
24252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsriq_n_s16(int16x8_t __a,int16x8_t __b,const int __c)24253 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24254 {
24255   return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24256 }
24257 
24258 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsriq_n_s32(int32x4_t __a,int32x4_t __b,const int __c)24259 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24260 {
24261   return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24262 }
24263 
24264 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vsriq_n_s64(int64x2_t __a,int64x2_t __b,const int __c)24265 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24266 {
24267   return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24268 }
24269 
24270 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsriq_n_u8(uint8x16_t __a,uint8x16_t __b,const int __c)24271 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24272 {
24273   return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24274 						     (int8x16_t) __b, __c);
24275 }
24276 
24277 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsriq_n_u16(uint16x8_t __a,uint16x8_t __b,const int __c)24278 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24279 {
24280   return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24281 						    (int16x8_t) __b, __c);
24282 }
24283 
24284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsriq_n_u32(uint32x4_t __a,uint32x4_t __b,const int __c)24285 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24286 {
24287   return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24288 						    (int32x4_t) __b, __c);
24289 }
24290 
24291 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vsriq_n_u64(uint64x2_t __a,uint64x2_t __b,const int __c)24292 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24293 {
24294   return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24295 						    (int64x2_t) __b, __c);
24296 }
24297 
24298 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsrid_n_s64(int64x1_t __a,int64x1_t __b,const int __c)24299 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24300 {
24301   return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24302 }
24303 
24304 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsrid_n_u64(uint64x1_t __a,uint64x1_t __b,const int __c)24305 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24306 {
24307   return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24308 }
24309 
24310 /* vstn */
24311 
24312 __extension__ static __inline void
vst2_s64(int64_t * __a,int64x1x2_t val)24313 vst2_s64 (int64_t * __a, int64x1x2_t val)
24314 {
24315   __builtin_aarch64_simd_oi __o;
24316   int64x2x2_t temp;
24317   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24318   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24319   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24320   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24321   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24322 }
24323 
24324 __extension__ static __inline void
vst2_u64(uint64_t * __a,uint64x1x2_t val)24325 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24326 {
24327   __builtin_aarch64_simd_oi __o;
24328   uint64x2x2_t temp;
24329   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24330   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24331   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24332   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24333   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24334 }
24335 
24336 __extension__ static __inline void
vst2_f64(float64_t * __a,float64x1x2_t val)24337 vst2_f64 (float64_t * __a, float64x1x2_t val)
24338 {
24339   __builtin_aarch64_simd_oi __o;
24340   float64x2x2_t temp;
24341   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24342   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24343   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24344   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24345   __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24346 }
24347 
24348 __extension__ static __inline void
vst2_s8(int8_t * __a,int8x8x2_t val)24349 vst2_s8 (int8_t * __a, int8x8x2_t val)
24350 {
24351   __builtin_aarch64_simd_oi __o;
24352   int8x16x2_t temp;
24353   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24354   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24355   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24356   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24357   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24358 }
24359 
24360 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_p8(poly8_t * __a,poly8x8x2_t val)24361 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24362 {
24363   __builtin_aarch64_simd_oi __o;
24364   poly8x16x2_t temp;
24365   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24366   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24367   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24368   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24369   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24370 }
24371 
24372 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_s16(int16_t * __a,int16x4x2_t val)24373 vst2_s16 (int16_t * __a, int16x4x2_t val)
24374 {
24375   __builtin_aarch64_simd_oi __o;
24376   int16x8x2_t temp;
24377   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24378   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24379   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24380   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24381   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24382 }
24383 
24384 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_p16(poly16_t * __a,poly16x4x2_t val)24385 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24386 {
24387   __builtin_aarch64_simd_oi __o;
24388   poly16x8x2_t temp;
24389   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24390   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24391   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24392   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24393   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24394 }
24395 
24396 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_s32(int32_t * __a,int32x2x2_t val)24397 vst2_s32 (int32_t * __a, int32x2x2_t val)
24398 {
24399   __builtin_aarch64_simd_oi __o;
24400   int32x4x2_t temp;
24401   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24402   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24403   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24404   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24405   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24406 }
24407 
24408 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u8(uint8_t * __a,uint8x8x2_t val)24409 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24410 {
24411   __builtin_aarch64_simd_oi __o;
24412   uint8x16x2_t temp;
24413   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24414   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24415   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24416   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24417   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24418 }
24419 
24420 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u16(uint16_t * __a,uint16x4x2_t val)24421 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24422 {
24423   __builtin_aarch64_simd_oi __o;
24424   uint16x8x2_t temp;
24425   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24426   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24427   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24428   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24429   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24430 }
24431 
24432 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_u32(uint32_t * __a,uint32x2x2_t val)24433 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24434 {
24435   __builtin_aarch64_simd_oi __o;
24436   uint32x4x2_t temp;
24437   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24438   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24439   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24440   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24441   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24442 }
24443 
24444 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32(float32_t * __a,float32x2x2_t val)24445 vst2_f32 (float32_t * __a, float32x2x2_t val)
24446 {
24447   __builtin_aarch64_simd_oi __o;
24448   float32x4x2_t temp;
24449   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24450   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24451   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24452   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24453   __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24454 }
24455 
24456 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s8(int8_t * __a,int8x16x2_t val)24457 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24458 {
24459   __builtin_aarch64_simd_oi __o;
24460   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24461   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24462   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24463 }
24464 
24465 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_p8(poly8_t * __a,poly8x16x2_t val)24466 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24467 {
24468   __builtin_aarch64_simd_oi __o;
24469   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24470   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24471   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24472 }
24473 
24474 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s16(int16_t * __a,int16x8x2_t val)24475 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24476 {
24477   __builtin_aarch64_simd_oi __o;
24478   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24479   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24480   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24481 }
24482 
24483 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_p16(poly16_t * __a,poly16x8x2_t val)24484 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24485 {
24486   __builtin_aarch64_simd_oi __o;
24487   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24488   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24489   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24490 }
24491 
24492 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s32(int32_t * __a,int32x4x2_t val)24493 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24494 {
24495   __builtin_aarch64_simd_oi __o;
24496   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24497   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24498   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24499 }
24500 
24501 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s64(int64_t * __a,int64x2x2_t val)24502 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24503 {
24504   __builtin_aarch64_simd_oi __o;
24505   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24506   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24507   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24508 }
24509 
24510 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u8(uint8_t * __a,uint8x16x2_t val)24511 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24512 {
24513   __builtin_aarch64_simd_oi __o;
24514   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24515   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24516   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24517 }
24518 
24519 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u16(uint16_t * __a,uint16x8x2_t val)24520 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24521 {
24522   __builtin_aarch64_simd_oi __o;
24523   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24524   __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24525   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24526 }
24527 
24528 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u32(uint32_t * __a,uint32x4x2_t val)24529 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24530 {
24531   __builtin_aarch64_simd_oi __o;
24532   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24533   __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24534   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24535 }
24536 
24537 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_u64(uint64_t * __a,uint64x2x2_t val)24538 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24539 {
24540   __builtin_aarch64_simd_oi __o;
24541   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24542   __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24543   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24544 }
24545 
24546 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32(float32_t * __a,float32x4x2_t val)24547 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24548 {
24549   __builtin_aarch64_simd_oi __o;
24550   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24551   __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24552   __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24553 }
24554 
24555 __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f64(float64_t * __a,float64x2x2_t val)24556 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24557 {
24558   __builtin_aarch64_simd_oi __o;
24559   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24560   __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24561   __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24562 }
24563 
24564 __extension__ static __inline void
vst3_s64(int64_t * __a,int64x1x3_t val)24565 vst3_s64 (int64_t * __a, int64x1x3_t val)
24566 {
24567   __builtin_aarch64_simd_ci __o;
24568   int64x2x3_t temp;
24569   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24570   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24571   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24572   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24573   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24574   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24575   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24576 }
24577 
24578 __extension__ static __inline void
vst3_u64(uint64_t * __a,uint64x1x3_t val)24579 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24580 {
24581   __builtin_aarch64_simd_ci __o;
24582   uint64x2x3_t temp;
24583   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24584   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24585   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24586   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24587   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24588   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24589   __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24590 }
24591 
24592 __extension__ static __inline void
vst3_f64(float64_t * __a,float64x1x3_t val)24593 vst3_f64 (float64_t * __a, float64x1x3_t val)
24594 {
24595   __builtin_aarch64_simd_ci __o;
24596   float64x2x3_t temp;
24597   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24598   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24599   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24600   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24601   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24602   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24603   __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24604 }
24605 
24606 __extension__ static __inline void
vst3_s8(int8_t * __a,int8x8x3_t val)24607 vst3_s8 (int8_t * __a, int8x8x3_t val)
24608 {
24609   __builtin_aarch64_simd_ci __o;
24610   int8x16x3_t temp;
24611   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24612   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24613   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24614   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24615   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24616   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24617   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24618 }
24619 
24620 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_p8(poly8_t * __a,poly8x8x3_t val)24621 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24622 {
24623   __builtin_aarch64_simd_ci __o;
24624   poly8x16x3_t temp;
24625   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24626   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24627   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24628   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24629   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24630   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24631   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24632 }
24633 
24634 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_s16(int16_t * __a,int16x4x3_t val)24635 vst3_s16 (int16_t * __a, int16x4x3_t val)
24636 {
24637   __builtin_aarch64_simd_ci __o;
24638   int16x8x3_t temp;
24639   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24640   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24641   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24642   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24643   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24644   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24645   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24646 }
24647 
24648 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_p16(poly16_t * __a,poly16x4x3_t val)24649 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24650 {
24651   __builtin_aarch64_simd_ci __o;
24652   poly16x8x3_t temp;
24653   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24654   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24655   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24656   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24657   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24658   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24659   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24660 }
24661 
24662 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_s32(int32_t * __a,int32x2x3_t val)24663 vst3_s32 (int32_t * __a, int32x2x3_t val)
24664 {
24665   __builtin_aarch64_simd_ci __o;
24666   int32x4x3_t temp;
24667   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24668   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24669   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24670   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24671   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24672   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24673   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24674 }
24675 
24676 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u8(uint8_t * __a,uint8x8x3_t val)24677 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24678 {
24679   __builtin_aarch64_simd_ci __o;
24680   uint8x16x3_t temp;
24681   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24682   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24683   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24684   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24685   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24686   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24687   __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24688 }
24689 
24690 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u16(uint16_t * __a,uint16x4x3_t val)24691 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24692 {
24693   __builtin_aarch64_simd_ci __o;
24694   uint16x8x3_t temp;
24695   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24696   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24697   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
24698   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24699   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24700   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24701   __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24702 }
24703 
24704 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_u32(uint32_t * __a,uint32x2x3_t val)24705 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24706 {
24707   __builtin_aarch64_simd_ci __o;
24708   uint32x4x3_t temp;
24709   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24710   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24711   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
24712   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24713   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24714   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24715   __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24716 }
24717 
24718 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32(float32_t * __a,float32x2x3_t val)24719 vst3_f32 (float32_t * __a, float32x2x3_t val)
24720 {
24721   __builtin_aarch64_simd_ci __o;
24722   float32x4x3_t temp;
24723   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24724   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24725   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
24726   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24727   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24728   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24729   __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24730 }
24731 
24732 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s8(int8_t * __a,int8x16x3_t val)24733 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24734 {
24735   __builtin_aarch64_simd_ci __o;
24736   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24737   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24738   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24739   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24740 }
24741 
24742 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_p8(poly8_t * __a,poly8x16x3_t val)24743 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24744 {
24745   __builtin_aarch64_simd_ci __o;
24746   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24747   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24748   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24749   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24750 }
24751 
24752 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s16(int16_t * __a,int16x8x3_t val)24753 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24754 {
24755   __builtin_aarch64_simd_ci __o;
24756   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24757   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24758   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24759   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24760 }
24761 
24762 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_p16(poly16_t * __a,poly16x8x3_t val)24763 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24764 {
24765   __builtin_aarch64_simd_ci __o;
24766   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24767   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24768   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24769   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24770 }
24771 
24772 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s32(int32_t * __a,int32x4x3_t val)24773 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24774 {
24775   __builtin_aarch64_simd_ci __o;
24776   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24777   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24778   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24779   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24780 }
24781 
24782 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s64(int64_t * __a,int64x2x3_t val)24783 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24784 {
24785   __builtin_aarch64_simd_ci __o;
24786   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24787   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24788   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24789   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24790 }
24791 
24792 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u8(uint8_t * __a,uint8x16x3_t val)24793 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24794 {
24795   __builtin_aarch64_simd_ci __o;
24796   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24797   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24798   __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24799   __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24800 }
24801 
24802 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u16(uint16_t * __a,uint16x8x3_t val)24803 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24804 {
24805   __builtin_aarch64_simd_ci __o;
24806   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24807   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24808   __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24809   __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24810 }
24811 
24812 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u32(uint32_t * __a,uint32x4x3_t val)24813 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24814 {
24815   __builtin_aarch64_simd_ci __o;
24816   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24817   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24818   __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24819   __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24820 }
24821 
24822 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_u64(uint64_t * __a,uint64x2x3_t val)24823 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24824 {
24825   __builtin_aarch64_simd_ci __o;
24826   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24827   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24828   __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24829   __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24830 }
24831 
24832 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32(float32_t * __a,float32x4x3_t val)24833 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24834 {
24835   __builtin_aarch64_simd_ci __o;
24836   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24837   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24838   __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24839   __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24840 }
24841 
24842 __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f64(float64_t * __a,float64x2x3_t val)24843 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24844 {
24845   __builtin_aarch64_simd_ci __o;
24846   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24847   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24848   __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24849   __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24850 }
24851 
24852 __extension__ static __inline void
vst4_s64(int64_t * __a,int64x1x4_t val)24853 vst4_s64 (int64_t * __a, int64x1x4_t val)
24854 {
24855   __builtin_aarch64_simd_xi __o;
24856   int64x2x4_t temp;
24857   temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24858   temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24859   temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24860   temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0)));
24861   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24862   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24863   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24864   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24865   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24866 }
24867 
24868 __extension__ static __inline void
vst4_u64(uint64_t * __a,uint64x1x4_t val)24869 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24870 {
24871   __builtin_aarch64_simd_xi __o;
24872   uint64x2x4_t temp;
24873   temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24874   temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24875   temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24876   temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0)));
24877   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24878   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24879   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24880   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24881   __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24882 }
24883 
24884 __extension__ static __inline void
vst4_f64(float64_t * __a,float64x1x4_t val)24885 vst4_f64 (float64_t * __a, float64x1x4_t val)
24886 {
24887   __builtin_aarch64_simd_xi __o;
24888   float64x2x4_t temp;
24889   temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24890   temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24891   temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24892   temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0)));
24893   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24894   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24895   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24896   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24897   __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24898 }
24899 
24900 __extension__ static __inline void
vst4_s8(int8_t * __a,int8x8x4_t val)24901 vst4_s8 (int8_t * __a, int8x8x4_t val)
24902 {
24903   __builtin_aarch64_simd_xi __o;
24904   int8x16x4_t temp;
24905   temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24906   temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24907   temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24908   temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0)));
24909   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24910   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24911   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24912   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24913   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24914 }
24915 
24916 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_p8(poly8_t * __a,poly8x8x4_t val)24917 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24918 {
24919   __builtin_aarch64_simd_xi __o;
24920   poly8x16x4_t temp;
24921   temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24922   temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24923   temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24924   temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0)));
24925   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24926   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24927   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24928   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24929   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24930 }
24931 
24932 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_s16(int16_t * __a,int16x4x4_t val)24933 vst4_s16 (int16_t * __a, int16x4x4_t val)
24934 {
24935   __builtin_aarch64_simd_xi __o;
24936   int16x8x4_t temp;
24937   temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24938   temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24939   temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24940   temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0)));
24941   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24942   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24943   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24944   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24945   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24946 }
24947 
24948 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_p16(poly16_t * __a,poly16x4x4_t val)24949 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24950 {
24951   __builtin_aarch64_simd_xi __o;
24952   poly16x8x4_t temp;
24953   temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24954   temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24955   temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24956   temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0)));
24957   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24958   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24959   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24960   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24961   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24962 }
24963 
24964 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_s32(int32_t * __a,int32x2x4_t val)24965 vst4_s32 (int32_t * __a, int32x2x4_t val)
24966 {
24967   __builtin_aarch64_simd_xi __o;
24968   int32x4x4_t temp;
24969   temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24970   temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24971   temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24972   temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0)));
24973   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24974   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24975   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24976   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24977   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24978 }
24979 
24980 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u8(uint8_t * __a,uint8x8x4_t val)24981 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24982 {
24983   __builtin_aarch64_simd_xi __o;
24984   uint8x16x4_t temp;
24985   temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24986   temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24987   temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24988   temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0)));
24989   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24990   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24991   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24992   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24993   __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24994 }
24995 
24996 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u16(uint16_t * __a,uint16x4x4_t val)24997 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24998 {
24999   __builtin_aarch64_simd_xi __o;
25000   uint16x8x4_t temp;
25001   temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
25002   temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
25003   temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
25004   temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0)));
25005   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25006   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25007   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25008   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25009   __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25010 }
25011 
25012 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_u32(uint32_t * __a,uint32x2x4_t val)25013 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
25014 {
25015   __builtin_aarch64_simd_xi __o;
25016   uint32x4x4_t temp;
25017   temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
25018   temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
25019   temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
25020   temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0)));
25021   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25022   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25023   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25024   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25025   __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25026 }
25027 
25028 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32(float32_t * __a,float32x2x4_t val)25029 vst4_f32 (float32_t * __a, float32x2x4_t val)
25030 {
25031   __builtin_aarch64_simd_xi __o;
25032   float32x4x4_t temp;
25033   temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
25034   temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
25035   temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
25036   temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0)));
25037   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
25038   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
25039   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
25040   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
25041   __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
25042 }
25043 
25044 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s8(int8_t * __a,int8x16x4_t val)25045 vst4q_s8 (int8_t * __a, int8x16x4_t val)
25046 {
25047   __builtin_aarch64_simd_xi __o;
25048   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25049   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25050   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25051   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25052   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25053 }
25054 
25055 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_p8(poly8_t * __a,poly8x16x4_t val)25056 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
25057 {
25058   __builtin_aarch64_simd_xi __o;
25059   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25060   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25061   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25062   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25063   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25064 }
25065 
25066 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s16(int16_t * __a,int16x8x4_t val)25067 vst4q_s16 (int16_t * __a, int16x8x4_t val)
25068 {
25069   __builtin_aarch64_simd_xi __o;
25070   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25071   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25072   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25073   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25074   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25075 }
25076 
25077 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_p16(poly16_t * __a,poly16x8x4_t val)25078 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
25079 {
25080   __builtin_aarch64_simd_xi __o;
25081   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25082   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25083   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25084   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25085   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25086 }
25087 
25088 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s32(int32_t * __a,int32x4x4_t val)25089 vst4q_s32 (int32_t * __a, int32x4x4_t val)
25090 {
25091   __builtin_aarch64_simd_xi __o;
25092   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25093   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25094   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25095   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25096   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25097 }
25098 
25099 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s64(int64_t * __a,int64x2x4_t val)25100 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25101 {
25102   __builtin_aarch64_simd_xi __o;
25103   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25104   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25105   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25106   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25107   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25108 }
25109 
25110 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u8(uint8_t * __a,uint8x16x4_t val)25111 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25112 {
25113   __builtin_aarch64_simd_xi __o;
25114   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25115   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25116   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25117   __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25118   __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25119 }
25120 
25121 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u16(uint16_t * __a,uint16x8x4_t val)25122 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25123 {
25124   __builtin_aarch64_simd_xi __o;
25125   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25126   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25127   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25128   __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25129   __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25130 }
25131 
25132 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u32(uint32_t * __a,uint32x4x4_t val)25133 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25134 {
25135   __builtin_aarch64_simd_xi __o;
25136   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25137   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25138   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25139   __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25140   __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25141 }
25142 
25143 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_u64(uint64_t * __a,uint64x2x4_t val)25144 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25145 {
25146   __builtin_aarch64_simd_xi __o;
25147   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25148   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25149   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25150   __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25151   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25152 }
25153 
25154 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32(float32_t * __a,float32x4x4_t val)25155 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25156 {
25157   __builtin_aarch64_simd_xi __o;
25158   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25159   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25160   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25161   __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25162   __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25163 }
25164 
25165 __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f64(float64_t * __a,float64x2x4_t val)25166 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25167 {
25168   __builtin_aarch64_simd_xi __o;
25169   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25170   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25171   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25172   __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25173   __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25174 }
25175 
25176 /* vsub */
25177 
25178 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsubd_s64(int64x1_t __a,int64x1_t __b)25179 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25180 {
25181   return __a - __b;
25182 }
25183 
25184 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsubd_u64(uint64x1_t __a,uint64x1_t __b)25185 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25186 {
25187   return __a - __b;
25188 }
25189 
25190 /* vtrn */
25191 
25192 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32(float32x2_t a,float32x2_t b)25193 vtrn_f32 (float32x2_t a, float32x2_t b)
25194 {
25195   return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25196 }
25197 
25198 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vtrn_p8(poly8x8_t a,poly8x8_t b)25199 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25200 {
25201   return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25202 }
25203 
25204 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vtrn_p16(poly16x4_t a,poly16x4_t b)25205 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25206 {
25207   return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25208 }
25209 
25210 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vtrn_s8(int8x8_t a,int8x8_t b)25211 vtrn_s8 (int8x8_t a, int8x8_t b)
25212 {
25213   return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25214 }
25215 
25216 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vtrn_s16(int16x4_t a,int16x4_t b)25217 vtrn_s16 (int16x4_t a, int16x4_t b)
25218 {
25219   return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25220 }
25221 
25222 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vtrn_s32(int32x2_t a,int32x2_t b)25223 vtrn_s32 (int32x2_t a, int32x2_t b)
25224 {
25225   return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25226 }
25227 
25228 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vtrn_u8(uint8x8_t a,uint8x8_t b)25229 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25230 {
25231   return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25232 }
25233 
25234 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vtrn_u16(uint16x4_t a,uint16x4_t b)25235 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25236 {
25237   return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25238 }
25239 
25240 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vtrn_u32(uint32x2_t a,uint32x2_t b)25241 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25242 {
25243   return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25244 }
25245 
25246 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vtrnq_f32(float32x4_t a,float32x4_t b)25247 vtrnq_f32 (float32x4_t a, float32x4_t b)
25248 {
25249   return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25250 }
25251 
25252 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vtrnq_p8(poly8x16_t a,poly8x16_t b)25253 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25254 {
25255   return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25256 }
25257 
25258 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vtrnq_p16(poly16x8_t a,poly16x8_t b)25259 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25260 {
25261   return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25262 }
25263 
25264 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vtrnq_s8(int8x16_t a,int8x16_t b)25265 vtrnq_s8 (int8x16_t a, int8x16_t b)
25266 {
25267   return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25268 }
25269 
25270 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vtrnq_s16(int16x8_t a,int16x8_t b)25271 vtrnq_s16 (int16x8_t a, int16x8_t b)
25272 {
25273   return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25274 }
25275 
25276 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vtrnq_s32(int32x4_t a,int32x4_t b)25277 vtrnq_s32 (int32x4_t a, int32x4_t b)
25278 {
25279   return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25280 }
25281 
25282 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vtrnq_u8(uint8x16_t a,uint8x16_t b)25283 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25284 {
25285   return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25286 }
25287 
25288 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vtrnq_u16(uint16x8_t a,uint16x8_t b)25289 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25290 {
25291   return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25292 }
25293 
25294 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vtrnq_u32(uint32x4_t a,uint32x4_t b)25295 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25296 {
25297   return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25298 }
25299 
25300 /* vtst */
25301 
25302 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_s8(int8x8_t __a,int8x8_t __b)25303 vtst_s8 (int8x8_t __a, int8x8_t __b)
25304 {
25305   return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25306 }
25307 
25308 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_s16(int16x4_t __a,int16x4_t __b)25309 vtst_s16 (int16x4_t __a, int16x4_t __b)
25310 {
25311   return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25312 }
25313 
25314 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_s32(int32x2_t __a,int32x2_t __b)25315 vtst_s32 (int32x2_t __a, int32x2_t __b)
25316 {
25317   return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25318 }
25319 
25320 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_s64(int64x1_t __a,int64x1_t __b)25321 vtst_s64 (int64x1_t __a, int64x1_t __b)
25322 {
25323   return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
25324 }
25325 
25326 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_u8(uint8x8_t __a,uint8x8_t __b)25327 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25328 {
25329   return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25330 						 (int8x8_t) __b);
25331 }
25332 
25333 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_u16(uint16x4_t __a,uint16x4_t __b)25334 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25335 {
25336   return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25337 						  (int16x4_t) __b);
25338 }
25339 
25340 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_u32(uint32x2_t __a,uint32x2_t __b)25341 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25342 {
25343   return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25344 						  (int32x2_t) __b);
25345 }
25346 
25347 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_u64(uint64x1_t __a,uint64x1_t __b)25348 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25349 {
25350   return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
25351 						(int64x1_t) __b);
25352 }
25353 
25354 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_s8(int8x16_t __a,int8x16_t __b)25355 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25356 {
25357   return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25358 }
25359 
25360 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_s16(int16x8_t __a,int16x8_t __b)25361 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25362 {
25363   return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25364 }
25365 
25366 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_s32(int32x4_t __a,int32x4_t __b)25367 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25368 {
25369   return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25370 }
25371 
25372 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_s64(int64x2_t __a,int64x2_t __b)25373 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25374 {
25375   return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25376 }
25377 
25378 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_u8(uint8x16_t __a,uint8x16_t __b)25379 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25380 {
25381   return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25382 						   (int8x16_t) __b);
25383 }
25384 
25385 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_u16(uint16x8_t __a,uint16x8_t __b)25386 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25387 {
25388   return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25389 						  (int16x8_t) __b);
25390 }
25391 
25392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_u32(uint32x4_t __a,uint32x4_t __b)25393 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25394 {
25395   return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25396 						  (int32x4_t) __b);
25397 }
25398 
25399 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_u64(uint64x2_t __a,uint64x2_t __b)25400 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25401 {
25402   return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25403 						  (int64x2_t) __b);
25404 }
25405 
25406 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtstd_s64(int64x1_t __a,int64x1_t __b)25407 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25408 {
25409   return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
25410 }
25411 
25412 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtstd_u64(uint64x1_t __a,uint64x1_t __b)25413 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25414 {
25415   return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
25416 						(int64x1_t) __b);
25417 }
25418 
25419 /* vuqadd */
25420 
25421 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuqadd_s8(int8x8_t __a,uint8x8_t __b)25422 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25423 {
25424   return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25425 }
25426 
25427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuqadd_s16(int16x4_t __a,uint16x4_t __b)25428 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25429 {
25430   return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25431 }
25432 
25433 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuqadd_s32(int32x2_t __a,uint32x2_t __b)25434 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25435 {
25436   return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25437 }
25438 
25439 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vuqadd_s64(int64x1_t __a,uint64x1_t __b)25440 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25441 {
25442   return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25443 }
25444 
25445 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuqaddq_s8(int8x16_t __a,uint8x16_t __b)25446 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25447 {
25448   return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25449 }
25450 
25451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuqaddq_s16(int16x8_t __a,uint16x8_t __b)25452 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25453 {
25454   return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25455 }
25456 
25457 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuqaddq_s32(int32x4_t __a,uint32x4_t __b)25458 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25459 {
25460   return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25461 }
25462 
25463 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuqaddq_s64(int64x2_t __a,uint64x2_t __b)25464 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25465 {
25466   return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25467 }
25468 
25469 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vuqaddb_s8(int8x1_t __a,uint8x1_t __b)25470 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25471 {
25472   return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25473 }
25474 
25475 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vuqaddh_s16(int16x1_t __a,uint16x1_t __b)25476 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25477 {
25478   return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25479 }
25480 
25481 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vuqadds_s32(int32x1_t __a,uint32x1_t __b)25482 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25483 {
25484   return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25485 }
25486 
25487 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vuqaddd_s64(int64x1_t __a,uint64x1_t __b)25488 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25489 {
25490   return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25491 }
25492 
25493 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
25494   __extension__ static __inline rettype					\
25495   __attribute__ ((__always_inline__))					\
25496   v ## op ## Q ## _ ## funcsuffix (intype a, intype b)			\
25497   {									\
25498     return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b),	\
25499 		      v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)};	\
25500   }
25501 
25502 #define __INTERLEAVE_LIST(op)					\
25503   __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,)	\
25504   __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,)		\
25505   __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,)		\
25506   __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,)		\
25507   __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,)		\
25508   __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,)		\
25509   __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,)		\
25510   __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,)		\
25511   __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,)		\
25512   __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q)	\
25513   __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q)		\
25514   __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q)	\
25515   __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q)		\
25516   __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q)		\
25517   __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q)		\
25518   __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q)		\
25519   __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q)	\
25520   __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25521 
25522 /* vuzp */
25523 
25524 __INTERLEAVE_LIST (uzp)
25525 
25526 /* vzip */
25527 
25528 __INTERLEAVE_LIST (zip)
25529 
25530 #undef __INTERLEAVE_LIST
25531 #undef __DEFINTERLEAVE
25532 
25533 /* End of optimal implementations in approved order.  */
25534 
25535 #endif
25536