1 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
2 /* { dg-options "-O -msve-vector-bits=2048 -mlittle-endian --save-temps" } */
3 /* { dg-final { check-function-bodies "**" "" } } */
4 
5 typedef unsigned char v128qi __attribute__((vector_size(128)));
6 typedef unsigned char v64qi __attribute__((vector_size(64)));
7 typedef unsigned char v32qi __attribute__((vector_size(32)));
8 typedef unsigned short v64hi __attribute__((vector_size(128)));
9 typedef unsigned short v32hi __attribute__((vector_size(64)));
10 typedef _Float16 v64hf __attribute__((vector_size(128)));
11 typedef _Float16 v32hf __attribute__((vector_size(64)));
12 typedef __bf16 v64bf __attribute__((vector_size(128)));
13 typedef __bf16 v32bf __attribute__((vector_size(64)));
14 typedef unsigned int v32si __attribute__((vector_size(128)));
15 typedef float v32sf __attribute__((vector_size(128)));
16 
17 #define PERM0(B) B, B - 1
18 #define PERM1(B) PERM0 (B), PERM0 (B - 2)
19 #define PERM2(B) PERM1 (B), PERM1 (B - 4)
20 #define PERM3(B) PERM2 (B), PERM2 (B - 8)
21 #define PERM4(B) PERM3 (B), PERM3 (B - 16)
22 #define PERM5(B) PERM4 (B), PERM4 (B - 32)
23 #define PERM6(B) PERM5 (B), PERM5 (B - 64)
24 
25 /*
26 ** qi_rev_h:
27 **	ptrue	(p[0-7])\.b, vl256
28 **	ld1b	(z[0-9]+)\.h, \1/z, \[x0\]
29 **	rev	(z[0-9]+)\.h, \2\.h
30 **	st1b	\3\.h, \1, \[x8\]
31 **	ret
32 */
33 v128qi
qi_rev_h(v128qi x)34 qi_rev_h (v128qi x)
35 {
36   return __builtin_shuffle (x, x, (v128qi) { PERM6 (127) });
37 }
38 
39 /*
40 ** qi_rev_s:
41 **	ptrue	(p[0-7])\.b, vl256
42 **	ld1b	(z[0-9]+)\.s, \1/z, \[x0\]
43 **	rev	(z[0-9]+)\.s, \2\.s
44 **	st1b	\3\.s, \1, \[x8\]
45 **	ret
46 */
47 v64qi
qi_rev_s(v64qi x)48 qi_rev_s (v64qi x)
49 {
50   return __builtin_shuffle (x, x, (v64qi) { PERM5 (63) });
51 }
52 
53 /*
54 ** qi_rev_d:
55 **	ptrue	(p[0-7])\.b, vl256
56 **	ld1b	(z[0-9]+)\.d, \1/z, \[x0\]
57 **	rev	(z[0-9]+)\.d, \2\.d
58 **	st1b	\3\.d, \1, \[x8\]
59 **	ret
60 */
61 v32qi
qi_rev_d(v32qi x)62 qi_rev_d (v32qi x)
63 {
64   return __builtin_shuffle (x, x, (v32qi) { PERM4 (31) });
65 }
66 
67 /*
68 ** hi_rev_s:
69 **	ptrue	(p[0-7])\.b, vl256
70 **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
71 **	rev	(z[0-9]+)\.s, \2\.s
72 **	st1h	\3\.s, \1, \[x8\]
73 **	ret
74 */
75 v64hi
hi_rev_s(v64hi x)76 hi_rev_s (v64hi x)
77 {
78   return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
79 }
80 
81 /*
82 ** hf_rev_s:
83 **	ptrue	(p[0-7])\.b, vl256
84 **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
85 **	rev	(z[0-9]+)\.s, \2\.s
86 **	st1h	\3\.s, \1, \[x8\]
87 **	ret
88 */
89 v64hf
hf_rev_s(v64hf x)90 hf_rev_s (v64hf x)
91 {
92   return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
93 }
94 
95 /*
96 ** bf_rev_s:
97 **	ptrue	(p[0-7])\.b, vl256
98 **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
99 **	rev	(z[0-9]+)\.s, \2\.s
100 **	st1h	\3\.s, \1, \[x8\]
101 **	ret
102 */
103 v64bf
bf_rev_s(v64bf x)104 bf_rev_s (v64bf x)
105 {
106   return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
107 }
108 
109 /*
110 ** hi_rev_d:
111 **	ptrue	(p[0-7])\.b, vl256
112 **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
113 **	rev	(z[0-9]+)\.d, \2\.d
114 **	st1h	\3\.d, \1, \[x8\]
115 **	ret
116 */
117 v32hi
hi_rev_d(v32hi x)118 hi_rev_d (v32hi x)
119 {
120   return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
121 }
122 
123 /*
124 ** hf_rev_d:
125 **	ptrue	(p[0-7])\.b, vl256
126 **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
127 **	rev	(z[0-9]+)\.d, \2\.d
128 **	st1h	\3\.d, \1, \[x8\]
129 **	ret
130 */
131 v32hf
hf_rev_d(v32hf x)132 hf_rev_d (v32hf x)
133 {
134   return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
135 }
136 
137 /*
138 ** bf_rev_d:
139 **	ptrue	(p[0-7])\.b, vl256
140 **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
141 **	rev	(z[0-9]+)\.d, \2\.d
142 **	st1h	\3\.d, \1, \[x8\]
143 **	ret
144 */
145 v32bf
bf_rev_d(v32bf x)146 bf_rev_d (v32bf x)
147 {
148   return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
149 }
150 
151 /*
152 ** si_rev_d:
153 **	ptrue	(p[0-7])\.b, vl256
154 **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
155 **	rev	(z[0-9]+)\.d, \2\.d
156 **	st1w	\3\.d, \1, \[x8\]
157 **	ret
158 */
159 v32si
si_rev_d(v32si x)160 si_rev_d (v32si x)
161 {
162   return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
163 }
164 
165 /*
166 ** sf_rev_d:
167 **	ptrue	(p[0-7])\.b, vl256
168 **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
169 **	rev	(z[0-9]+)\.d, \2\.d
170 **	st1w	\3\.d, \1, \[x8\]
171 **	ret
172 */
173 v32sf
sf_rev_d(v32sf x)174 sf_rev_d (v32sf x)
175 {
176   return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
177 }
178