1 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
2 /* { dg-options "-O -msve-vector-bits=2048 -mlittle-endian --save-temps" } */
3 /* { dg-final { check-function-bodies "**" "" } } */
4
5 typedef unsigned char v128qi __attribute__((vector_size(128)));
6 typedef unsigned char v64qi __attribute__((vector_size(64)));
7 typedef unsigned char v32qi __attribute__((vector_size(32)));
8 typedef unsigned short v64hi __attribute__((vector_size(128)));
9 typedef unsigned short v32hi __attribute__((vector_size(64)));
10 typedef _Float16 v64hf __attribute__((vector_size(128)));
11 typedef _Float16 v32hf __attribute__((vector_size(64)));
12 typedef __bf16 v64bf __attribute__((vector_size(128)));
13 typedef __bf16 v32bf __attribute__((vector_size(64)));
14 typedef unsigned int v32si __attribute__((vector_size(128)));
15 typedef float v32sf __attribute__((vector_size(128)));
16
17 #define PERM0(B) B, B - 1
18 #define PERM1(B) PERM0 (B), PERM0 (B - 2)
19 #define PERM2(B) PERM1 (B), PERM1 (B - 4)
20 #define PERM3(B) PERM2 (B), PERM2 (B - 8)
21 #define PERM4(B) PERM3 (B), PERM3 (B - 16)
22 #define PERM5(B) PERM4 (B), PERM4 (B - 32)
23 #define PERM6(B) PERM5 (B), PERM5 (B - 64)
24
25 /*
26 ** qi_rev_h:
27 ** ptrue (p[0-7])\.b, vl256
28 ** ld1b (z[0-9]+)\.h, \1/z, \[x0\]
29 ** rev (z[0-9]+)\.h, \2\.h
30 ** st1b \3\.h, \1, \[x8\]
31 ** ret
32 */
33 v128qi
qi_rev_h(v128qi x)34 qi_rev_h (v128qi x)
35 {
36 return __builtin_shuffle (x, x, (v128qi) { PERM6 (127) });
37 }
38
39 /*
40 ** qi_rev_s:
41 ** ptrue (p[0-7])\.b, vl256
42 ** ld1b (z[0-9]+)\.s, \1/z, \[x0\]
43 ** rev (z[0-9]+)\.s, \2\.s
44 ** st1b \3\.s, \1, \[x8\]
45 ** ret
46 */
47 v64qi
qi_rev_s(v64qi x)48 qi_rev_s (v64qi x)
49 {
50 return __builtin_shuffle (x, x, (v64qi) { PERM5 (63) });
51 }
52
53 /*
54 ** qi_rev_d:
55 ** ptrue (p[0-7])\.b, vl256
56 ** ld1b (z[0-9]+)\.d, \1/z, \[x0\]
57 ** rev (z[0-9]+)\.d, \2\.d
58 ** st1b \3\.d, \1, \[x8\]
59 ** ret
60 */
61 v32qi
qi_rev_d(v32qi x)62 qi_rev_d (v32qi x)
63 {
64 return __builtin_shuffle (x, x, (v32qi) { PERM4 (31) });
65 }
66
67 /*
68 ** hi_rev_s:
69 ** ptrue (p[0-7])\.b, vl256
70 ** ld1h (z[0-9]+)\.s, \1/z, \[x0\]
71 ** rev (z[0-9]+)\.s, \2\.s
72 ** st1h \3\.s, \1, \[x8\]
73 ** ret
74 */
75 v64hi
hi_rev_s(v64hi x)76 hi_rev_s (v64hi x)
77 {
78 return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
79 }
80
81 /*
82 ** hf_rev_s:
83 ** ptrue (p[0-7])\.b, vl256
84 ** ld1h (z[0-9]+)\.s, \1/z, \[x0\]
85 ** rev (z[0-9]+)\.s, \2\.s
86 ** st1h \3\.s, \1, \[x8\]
87 ** ret
88 */
89 v64hf
hf_rev_s(v64hf x)90 hf_rev_s (v64hf x)
91 {
92 return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
93 }
94
95 /*
96 ** bf_rev_s:
97 ** ptrue (p[0-7])\.b, vl256
98 ** ld1h (z[0-9]+)\.s, \1/z, \[x0\]
99 ** rev (z[0-9]+)\.s, \2\.s
100 ** st1h \3\.s, \1, \[x8\]
101 ** ret
102 */
103 v64bf
bf_rev_s(v64bf x)104 bf_rev_s (v64bf x)
105 {
106 return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
107 }
108
109 /*
110 ** hi_rev_d:
111 ** ptrue (p[0-7])\.b, vl256
112 ** ld1h (z[0-9]+)\.d, \1/z, \[x0\]
113 ** rev (z[0-9]+)\.d, \2\.d
114 ** st1h \3\.d, \1, \[x8\]
115 ** ret
116 */
117 v32hi
hi_rev_d(v32hi x)118 hi_rev_d (v32hi x)
119 {
120 return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
121 }
122
123 /*
124 ** hf_rev_d:
125 ** ptrue (p[0-7])\.b, vl256
126 ** ld1h (z[0-9]+)\.d, \1/z, \[x0\]
127 ** rev (z[0-9]+)\.d, \2\.d
128 ** st1h \3\.d, \1, \[x8\]
129 ** ret
130 */
131 v32hf
hf_rev_d(v32hf x)132 hf_rev_d (v32hf x)
133 {
134 return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
135 }
136
137 /*
138 ** bf_rev_d:
139 ** ptrue (p[0-7])\.b, vl256
140 ** ld1h (z[0-9]+)\.d, \1/z, \[x0\]
141 ** rev (z[0-9]+)\.d, \2\.d
142 ** st1h \3\.d, \1, \[x8\]
143 ** ret
144 */
145 v32bf
bf_rev_d(v32bf x)146 bf_rev_d (v32bf x)
147 {
148 return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
149 }
150
151 /*
152 ** si_rev_d:
153 ** ptrue (p[0-7])\.b, vl256
154 ** ld1w (z[0-9]+)\.d, \1/z, \[x0\]
155 ** rev (z[0-9]+)\.d, \2\.d
156 ** st1w \3\.d, \1, \[x8\]
157 ** ret
158 */
159 v32si
si_rev_d(v32si x)160 si_rev_d (v32si x)
161 {
162 return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
163 }
164
165 /*
166 ** sf_rev_d:
167 ** ptrue (p[0-7])\.b, vl256
168 ** ld1w (z[0-9]+)\.d, \1/z, \[x0\]
169 ** rev (z[0-9]+)\.d, \2\.d
170 ** st1w \3\.d, \1, \[x8\]
171 ** ret
172 */
173 v32sf
sf_rev_d(v32sf x)174 sf_rev_d (v32sf x)
175 {
176 return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
177 }
178