1 #include <arm_neon.h>
2 #include "arm-neon-ref.h"
3 #include "compute-ref-data.h"
4
5 /* We test vdup and vmov in the same place since they are aliases. */
6
7 /* Expected results. */
8 /* Chunk 0. */
9 VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
10 0xf0, 0xf0, 0xf0, 0xf0 };
11 VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
12 VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
13 VECT_VAR_DECL(expected0,int,64,1) [] = { 0xfffffffffffffff0 };
14 VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
15 0xf0, 0xf0, 0xf0, 0xf0 };
16 VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
17 VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
18 VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
19 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
20 0xf0, 0xf0, 0xf0, 0xf0 };
21 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
22 #if defined (FP16_SUPPORTED)
23 VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
24 0xcc00, 0xcc00 };
25 #endif
26 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
27 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
28 0xf0, 0xf0, 0xf0, 0xf0,
29 0xf0, 0xf0, 0xf0, 0xf0,
30 0xf0, 0xf0, 0xf0, 0xf0 };
31 VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
32 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
33 VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
34 0xfffffff0, 0xfffffff0 };
35 VECT_VAR_DECL(expected0,int,64,2) [] = { 0xfffffffffffffff0,
36 0xfffffffffffffff0 };
37 VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
38 0xf0, 0xf0, 0xf0, 0xf0,
39 0xf0, 0xf0, 0xf0, 0xf0,
40 0xf0, 0xf0, 0xf0, 0xf0 };
41 VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
42 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
43 VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
44 0xfffffff0, 0xfffffff0 };
45 VECT_VAR_DECL(expected0,uint,64,2) [] = { 0xfffffffffffffff0,
46 0xfffffffffffffff0 };
47 VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
48 0xf0, 0xf0, 0xf0, 0xf0,
49 0xf0, 0xf0, 0xf0, 0xf0,
50 0xf0, 0xf0, 0xf0, 0xf0 };
51 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
52 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
53 #if defined (FP16_SUPPORTED)
54 VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
55 0xcc00, 0xcc00,
56 0xcc00, 0xcc00,
57 0xcc00, 0xcc00 };
58 #endif
59 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
60 0xc1800000, 0xc1800000 };
61
62 /* Chunk 1. */
63 VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
64 0xf1, 0xf1, 0xf1, 0xf1 };
65 VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
66 VECT_VAR_DECL(expected1,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
67 VECT_VAR_DECL(expected1,int,64,1) [] = { 0xfffffffffffffff1 };
68 VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
69 0xf1, 0xf1, 0xf1, 0xf1 };
70 VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
71 VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
72 VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
73 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
74 0xf1, 0xf1, 0xf1, 0xf1 };
75 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
76 #if defined (FP16_SUPPORTED)
77 VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
78 0xcb80, 0xcb80 };
79 #endif
80 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
81 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
82 0xf1, 0xf1, 0xf1, 0xf1,
83 0xf1, 0xf1, 0xf1, 0xf1,
84 0xf1, 0xf1, 0xf1, 0xf1 };
85 VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
86 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
87 VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
88 0xfffffff1, 0xfffffff1 };
89 VECT_VAR_DECL(expected1,int,64,2) [] = { 0xfffffffffffffff1,
90 0xfffffffffffffff1 };
91 VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
92 0xf1, 0xf1, 0xf1, 0xf1,
93 0xf1, 0xf1, 0xf1, 0xf1,
94 0xf1, 0xf1, 0xf1, 0xf1 };
95 VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
96 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
97 VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
98 0xfffffff1, 0xfffffff1 };
99 VECT_VAR_DECL(expected1,uint,64,2) [] = { 0xfffffffffffffff1,
100 0xfffffffffffffff1 };
101 VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
102 0xf1, 0xf1, 0xf1, 0xf1,
103 0xf1, 0xf1, 0xf1, 0xf1,
104 0xf1, 0xf1, 0xf1, 0xf1 };
105 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
106 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
107 #if defined (FP16_SUPPORTED)
108 VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
109 0xcb80, 0xcb80,
110 0xcb80, 0xcb80,
111 0xcb80, 0xcb80 };
112 #endif
113 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
114 0xc1700000, 0xc1700000 };
115
116 /* Chunk 2. */
117 VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
118 0xf2, 0xf2, 0xf2, 0xf2 };
119 VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
120 VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff2, 0xfffffff2 };
121 VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff2 };
122 VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
123 0xf2, 0xf2, 0xf2, 0xf2 };
124 VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
125 VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 };
126 VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
127 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
128 0xf2, 0xf2, 0xf2, 0xf2 };
129 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
130 #if defined (FP16_SUPPORTED)
131 VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
132 0xcb00, 0xcb00 };
133 #endif
134 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
135 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
136 0xf2, 0xf2, 0xf2, 0xf2,
137 0xf2, 0xf2, 0xf2, 0xf2,
138 0xf2, 0xf2, 0xf2, 0xf2 };
139 VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
140 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
141 VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0xfffffff2,
142 0xfffffff2, 0xfffffff2 };
143 VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff2,
144 0xfffffffffffffff2 };
145 VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
146 0xf2, 0xf2, 0xf2, 0xf2,
147 0xf2, 0xf2, 0xf2, 0xf2,
148 0xf2, 0xf2, 0xf2, 0xf2 };
149 VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
150 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
151 VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0xfffffff2,
152 0xfffffff2, 0xfffffff2 };
153 VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff2,
154 0xfffffffffffffff2 };
155 VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
156 0xf2, 0xf2, 0xf2, 0xf2,
157 0xf2, 0xf2, 0xf2, 0xf2,
158 0xf2, 0xf2, 0xf2, 0xf2 };
159 VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
160 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
161 #if defined (FP16_SUPPORTED)
162 VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
163 0xcb00, 0xcb00,
164 0xcb00, 0xcb00,
165 0xcb00, 0xcb00 };
166 #endif
167 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
168 0xc1600000, 0xc1600000 };
169
170 #define TEST_MSG "VDUP/VDUPQ"
exec_vdup_vmov(void)171 void exec_vdup_vmov (void)
172 {
173 int i;
174
175 /* Basic test: vec=vdup(x), then store the result. */
176 #undef TEST_VDUP
177 #define TEST_VDUP(Q, T1, T2, W, N) \
178 VECT_VAR(vector, T1, W, N) = \
179 vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
180 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
181
182 /* Basic test: vec=vmov(x), then store the result. */
183 #define TEST_VMOV(Q, T1, T2, W, N) \
184 VECT_VAR(vector, T1, W, N) = \
185 vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
186 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
187
188 DECL_VARIABLE_ALL_VARIANTS(vector);
189
190 /* Try to read different places from the input buffer. */
191 for (i=0; i< 3; i++) {
192 clean_results ();
193
194 TEST_VDUP(, int, s, 8, 8);
195 TEST_VDUP(, int, s, 16, 4);
196 TEST_VDUP(, int, s, 32, 2);
197 TEST_VDUP(, int, s, 64, 1);
198 TEST_VDUP(, uint, u, 8, 8);
199 TEST_VDUP(, uint, u, 16, 4);
200 TEST_VDUP(, uint, u, 32, 2);
201 TEST_VDUP(, uint, u, 64, 1);
202 TEST_VDUP(, poly, p, 8, 8);
203 TEST_VDUP(, poly, p, 16, 4);
204 #if defined (FP16_SUPPORTED)
205 TEST_VDUP(, float, f, 16, 4);
206 #endif
207 TEST_VDUP(, float, f, 32, 2);
208
209 TEST_VDUP(q, int, s, 8, 16);
210 TEST_VDUP(q, int, s, 16, 8);
211 TEST_VDUP(q, int, s, 32, 4);
212 TEST_VDUP(q, int, s, 64, 2);
213 TEST_VDUP(q, uint, u, 8, 16);
214 TEST_VDUP(q, uint, u, 16, 8);
215 TEST_VDUP(q, uint, u, 32, 4);
216 TEST_VDUP(q, uint, u, 64, 2);
217 TEST_VDUP(q, poly, p, 8, 16);
218 TEST_VDUP(q, poly, p, 16, 8);
219 #if defined (FP16_SUPPORTED)
220 TEST_VDUP(q, float, f, 16, 8);
221 #endif
222 TEST_VDUP(q, float, f, 32, 4);
223
224 #if defined (FP16_SUPPORTED)
225 switch (i) {
226 case 0:
227 CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
228 break;
229 case 1:
230 CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
231 break;
232 case 2:
233 CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
234 break;
235 default:
236 abort();
237 }
238 #else
239 switch (i) {
240 case 0:
241 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
242 break;
243 case 1:
244 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
245 break;
246 case 2:
247 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
248 break;
249 default:
250 abort();
251 }
252 #endif
253 }
254
255 /* Do the same tests with vmov. Use the same expected results. */
256 #undef TEST_MSG
257 #define TEST_MSG "VMOV/VMOVQ"
258 for (i=0; i< 3; i++) {
259 clean_results ();
260
261 TEST_VMOV(, int, s, 8, 8);
262 TEST_VMOV(, int, s, 16, 4);
263 TEST_VMOV(, int, s, 32, 2);
264 TEST_VMOV(, int, s, 64, 1);
265 TEST_VMOV(, uint, u, 8, 8);
266 TEST_VMOV(, uint, u, 16, 4);
267 TEST_VMOV(, uint, u, 32, 2);
268 TEST_VMOV(, uint, u, 64, 1);
269 TEST_VMOV(, poly, p, 8, 8);
270 TEST_VMOV(, poly, p, 16, 4);
271 #if defined (FP16_SUPPORTED)
272 TEST_VMOV(, float, f, 16, 4);
273 #endif
274 TEST_VMOV(, float, f, 32, 2);
275
276 TEST_VMOV(q, int, s, 8, 16);
277 TEST_VMOV(q, int, s, 16, 8);
278 TEST_VMOV(q, int, s, 32, 4);
279 TEST_VMOV(q, int, s, 64, 2);
280 TEST_VMOV(q, uint, u, 8, 16);
281 TEST_VMOV(q, uint, u, 16, 8);
282 TEST_VMOV(q, uint, u, 32, 4);
283 TEST_VMOV(q, uint, u, 64, 2);
284 TEST_VMOV(q, poly, p, 8, 16);
285 TEST_VMOV(q, poly, p, 16, 8);
286 #if defined (FP16_SUPPORTED)
287 TEST_VMOV(q, float, f, 16, 8);
288 #endif
289 TEST_VMOV(q, float, f, 32, 4);
290
291 #if defined (FP16_SUPPORTED)
292 switch (i) {
293 case 0:
294 CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
295 break;
296 case 1:
297 CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
298 break;
299 case 2:
300 CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
301 break;
302 default:
303 abort();
304 }
305 #else
306 switch (i) {
307 case 0:
308 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
309 break;
310 case 1:
311 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
312 break;
313 case 2:
314 CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
315 break;
316 default:
317 abort();
318 }
319 #endif
320
321 }
322 }
323
main(void)324 int main (void)
325 {
326 exec_vdup_vmov ();
327 return 0;
328 }
329