1 /* This file defines helper operations shared by all the tests.  */
2 
3 #ifndef _ARM_NEON_REF_H_
4 #define _ARM_NEON_REF_H_
5 
6 #include <stdio.h>
7 #include <inttypes.h>
8 
9 /* helper type, to help write floating point results in integer form.  */
10 typedef uint16_t hfloat16_t;
11 typedef uint32_t hfloat32_t;
12 typedef uint64_t hfloat64_t;
13 
14 typedef uint16_t hbfloat16_t;
15 
16 extern void abort(void);
17 extern void *memset(void *, int, size_t);
18 extern void *memcpy(void *, const void *, size_t);
19 extern size_t strlen(const char *);
20 
21 /* Helper macro to select FP16 tests.  */
22 #if (defined (__ARM_FP16_FORMAT_IEEE) \
23      || defined (__ARM_FP16_FORMAT_ALTERNATIVE))
24 #define FP16_SUPPORTED (1)
25 #else
26 #undef FP16_SUPPORTED
27 #endif
28 
29 /* Various string construction helpers.  */
30 
31 /*
32   The most useful at user-level are VECT_VAR and VECT_VAR_DECL, which
33    construct variable names or declarations, such as:
34    VECT_VAR(expected, int, 16, 4) -> expected_int16x4
35    VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
36 */
37 /* Some instructions don't exist on ARM.
38    Use this macro to guard against them.  */
39 #ifdef __aarch64__
40 #define AARCH64_ONLY(X) X
41 #else
42 #define AARCH64_ONLY(X)
43 #endif
44 
45 #define xSTR(X) #X
46 #define STR(X) xSTR(X)
47 
48 #define xNAME1(V,T) V ## _ ##  T
49 #define xNAME(V,T) xNAME1(V,T)
50 
51 /* VAR(foo,int,16) -> foo_int16 */
52 #define VAR(V,T,W) xNAME(V,T##W)
53 /* VAR_DECL(foo,int,16) -> int16_t foo_int16 */
54 #define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
55 
56 /* VECT_NAME(int,16,4) ->  int16x4 */
57 #define VECT_NAME(T, W, N) T##W##x##N
58 /* VECT_ARRAY_NAME(int,16,4,2) -> int16x4x2 */
59 #define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
60 /* VECT_TYPE(int,16,4) -> int16x4_t */
61 #define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
62 /* VECT_ARRAY_TYPE(int,16,4,2) -> int16x4x2_t */
63 #define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
64 
65 /* VECT_VAR(foo,int,16,4) -> foo_int16x4 */
66 #define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
67 /* VECT_VAR_DECL(foo,int,16,4) -> int16_t foo_int16x4 */
68 #define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
69 
70 /* Array declarations.  */
71 /* ARRAY(foo,int,16,4) -> int16_t foo_int16x4[4] */
72 #define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
73 
74 /* Arrays of vectors.  */
75 /* VECT_ARRAY_VAR(foo,int,16,4,2) -> foo_int16x4x2 */
76 #define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
77 /* VECT_ARRAY(foo,int,16,4,2) -> int16_t foo_int16x4x2[4*2] */
78 #define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
79 
80 /* Check results vs expected values. Operates on one vector.  */
81 #define CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)				\
82   {									\
83     int i;								\
84     for(i=0; i<N ; i++)							\
85       {									\
86 	if (VECT_VAR(result, T, W, N)[i] !=				\
87 	    VECT_VAR(EXPECTED, T, W, N)[i]) {				\
88 	  fprintf(stderr,						\
89 		  "ERROR in %s (%s line %d in buffer '%s') at type %s "	\
90 		  "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",	\
91 		  MSG, __FILE__, __LINE__,				\
92 		  STR(EXPECTED),					\
93 		  STR(VECT_NAME(T, W, N)),				\
94 		  i,							\
95 		  VECT_VAR(result, T, W, N)[i],				\
96 		  VECT_VAR(EXPECTED, T, W, N)[i],			\
97 		  strlen(COMMENT) > 0 ? COMMENT : "");			\
98 	  abort();							\
99 	}								\
100       }									\
101     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
102   }
103 
104 /* Floating-point variant.  */
105 #define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
106   {									\
107     int i;								\
108     for(i=0; i<N ; i++)							\
109       {									\
110 	union fp_operand {						\
111 	  uint##W##_t i;						\
112 	  T##W##_t f;							\
113 	} tmp_res, tmp_exp;						\
114 	tmp_res.f = VECT_VAR(result, T, W, N)[i];			\
115 	tmp_exp.i = VECT_VAR(EXPECTED, h##T, W, N)[i];			\
116 	if (tmp_res.i != tmp_exp.i) {					\
117 	  fprintf(stderr,						\
118 		  "ERROR in %s (%s line %d in buffer '%s') at type %s "	\
119 		  "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",	\
120 		  MSG, __FILE__, __LINE__,				\
121 		  STR(EXPECTED),					\
122 		  STR(VECT_NAME(T, W, N)),				\
123 		  i,							\
124 		  tmp_res.i,						\
125 		  tmp_exp.i,						\
126 		  strlen(COMMENT) > 0 ? COMMENT : "");			\
127 	  abort();							\
128 	}								\
129       }									\
130     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
131   }
132 
133 /* poly variant.  */
134 #define CHECK_POLY(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
135   {									\
136     int i;								\
137     for(i=0; i<N ; i++)							\
138       {									\
139 	union poly_operand {						\
140 	  uint##W##_t i;						\
141 	  poly##W##_t p;						\
142 	} tmp_res, tmp_exp;						\
143 	tmp_res.p = VECT_VAR(result, T, W, N)[i];			\
144 	tmp_exp.i = VECT_VAR(EXPECTED, T, W, N)[i];			\
145 	if (tmp_res.i != tmp_exp.i) {					\
146 	  fprintf(stderr,						\
147 		  "ERROR in %s (%s line %d in buffer '%s') at type %s "	\
148 		  "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",	\
149 		  MSG, __FILE__, __LINE__,				\
150 		  STR(EXPECTED),					\
151 		  STR(VECT_NAME(T, W, N)),				\
152 		  i,							\
153 		  tmp_res.i,						\
154 		  tmp_exp.i,						\
155 		  strlen(COMMENT) > 0 ? COMMENT : "");			\
156 	  abort();							\
157 	}								\
158       }									\
159     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
160   }
161 
162 /* Clean buffer with a non-zero pattern to help diagnose buffer
163    overflows.  */
164 #define CLEAN_PATTERN_8  0x33
165 
166 #define CLEAN(VAR,T,W,N)						\
167   memset(VECT_VAR(VAR, T, W, N),					\
168 	 CLEAN_PATTERN_8,						\
169 	 sizeof(VECT_VAR(VAR, T, W, N)));
170 
171 /* Define output buffers, one of each size.  */
172 static ARRAY(result, int, 8, 8);
173 static ARRAY(result, int, 16, 4);
174 static ARRAY(result, int, 32, 2);
175 static ARRAY(result, int, 64, 1);
176 static ARRAY(result, uint, 8, 8);
177 static ARRAY(result, uint, 16, 4);
178 static ARRAY(result, uint, 32, 2);
179 static ARRAY(result, uint, 64, 1);
180 static ARRAY(result, poly, 8, 8);
181 static ARRAY(result, poly, 16, 4);
182 #if defined (__ARM_FEATURE_CRYPTO)
183 static ARRAY(result, poly, 64, 1);
184 #endif
185 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
186 static ARRAY(result, float, 16, 4);
187 #endif
188 static ARRAY(result, float, 32, 2);
189 #ifdef __aarch64__
190 static ARRAY(result, float, 64, 1);
191 #endif
192 static ARRAY(result, int, 8, 16);
193 static ARRAY(result, int, 16, 8);
194 static ARRAY(result, int, 32, 4);
195 static ARRAY(result, int, 64, 2);
196 static ARRAY(result, uint, 8, 16);
197 static ARRAY(result, uint, 16, 8);
198 static ARRAY(result, uint, 32, 4);
199 static ARRAY(result, uint, 64, 2);
200 static ARRAY(result, poly, 8, 16);
201 static ARRAY(result, poly, 16, 8);
202 #if defined (__ARM_FEATURE_CRYPTO)
203 static ARRAY(result, poly, 64, 2);
204 #endif
205 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
206 static ARRAY(result, float, 16, 8);
207 #endif
208 static ARRAY(result, float, 32, 4);
209 #ifdef __aarch64__
210 static ARRAY(result, float, 64, 2);
211 #endif
212 
213 /* Declare expected results, one of each size. They are defined and
214    initialized in each test file.  */
215 extern ARRAY(expected, int, 8, 8);
216 extern ARRAY(expected, int, 16, 4);
217 extern ARRAY(expected, int, 32, 2);
218 extern ARRAY(expected, int, 64, 1);
219 extern ARRAY(expected, uint, 8, 8);
220 extern ARRAY(expected, uint, 16, 4);
221 extern ARRAY(expected, uint, 32, 2);
222 extern ARRAY(expected, uint, 64, 1);
223 extern ARRAY(expected, poly, 8, 8);
224 extern ARRAY(expected, poly, 16, 4);
225 extern ARRAY(expected, hfloat, 16, 4);
226 extern ARRAY(expected, hfloat, 32, 2);
227 extern ARRAY(expected, hfloat, 64, 1);
228 extern ARRAY(expected, int, 8, 16);
229 extern ARRAY(expected, int, 16, 8);
230 extern ARRAY(expected, int, 32, 4);
231 extern ARRAY(expected, int, 64, 2);
232 extern ARRAY(expected, uint, 8, 16);
233 extern ARRAY(expected, uint, 16, 8);
234 extern ARRAY(expected, uint, 32, 4);
235 extern ARRAY(expected, uint, 64, 2);
236 extern ARRAY(expected, poly, 8, 16);
237 extern ARRAY(expected, poly, 16, 8);
238 extern ARRAY(expected, hfloat, 16, 8);
239 extern ARRAY(expected, hfloat, 32, 4);
240 extern ARRAY(expected, hfloat, 64, 2);
241 
242 #define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment)		\
243   {									\
244     CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
245     CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
246     CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
247     CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
248     CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
249     CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
250     CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
251     CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
252     CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);	\
253     CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);	\
254     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
255 									\
256     CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
257     CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
258     CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
259     CHECK(test_name, int, 64, 2, PRIx64, EXPECTED, comment);		\
260     CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
261     CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
262     CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
263     CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment);		\
264     CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);	\
265     CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);	\
266     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
267   }									\
268 
269 /* Check results against EXPECTED.  Operates on all possible vector types.  */
270 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
271 #define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)			\
272   {									\
273     CHECK_RESULTS_NAMED_NO_FP16(test_name, EXPECTED, comment)		\
274     CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment);	\
275     CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment);	\
276   }
277 #else
278 #define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)		\
279   CHECK_RESULTS_NAMED_NO_FP16(test_name, EXPECTED, comment)
280 #endif
281 
282 #define CHECK_RESULTS_NO_FP16(test_name,comment)			\
283   CHECK_RESULTS_NAMED_NO_FP16(test_name, expected, comment)
284 
285 #define CHECK_RESULTS(test_name,comment)		\
286   CHECK_RESULTS_NAMED(test_name, expected, comment)
287 
288 
289 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
290 
291 typedef union {
292   struct {
293     int _xxx:24;
294     unsigned int FZ:1;
295     unsigned int DN:1;
296     unsigned int AHP:1;
297     unsigned int QC:1;
298     int V:1;
299     int C:1;
300     int Z:1;
301     int N:1;
302   } b;
303   unsigned int word;
304 } _ARM_FPSCR;
305 
306 #else /* __ORDER_BIG_ENDIAN__ */
307 
308 typedef union {
309   struct {
310     int N:1;
311     int Z:1;
312     int C:1;
313     int V:1;
314     unsigned int QC:1;
315     unsigned int AHP:1;
316     unsigned int DN:1;
317     unsigned int FZ:1;
318     int _dnm:24;
319   } b;
320   unsigned int word;
321 } _ARM_FPSCR;
322 
323 #endif /* __ORDER_BIG_ENDIAN__ */
324 
325 #define Neon_Cumulative_Sat  __read_neon_cumulative_sat()
326 /* We need a fake dependency to ensure correct ordering of asm
327    statements to preset the QC flag value, and Neon operators writing
328    to QC. */
329 #define Set_Neon_Cumulative_Sat(x, depend)	\
330   __set_neon_cumulative_sat((x), (depend))
331 
332 #if defined(__aarch64__)
__read_neon_cumulative_sat(void)333 static volatile int __read_neon_cumulative_sat (void) {
334     _ARM_FPSCR _afpscr_for_qc;
335     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
336     return _afpscr_for_qc.b.QC;
337 }
338 #define __set_neon_cumulative_sat(x, depend) {				\
339     _ARM_FPSCR _afpscr_for_qc;						\
340     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));		\
341     _afpscr_for_qc.b.QC = x;						\
342     asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
343   }
344 #else
__read_neon_cumulative_sat(void)345 static volatile int __read_neon_cumulative_sat (void) {
346     _ARM_FPSCR _afpscr_for_qc;
347     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
348     return _afpscr_for_qc.b.QC;
349 }
350 
351 #define __set_neon_cumulative_sat(x, depend) {				\
352     _ARM_FPSCR _afpscr_for_qc;						\
353     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));		\
354     _afpscr_for_qc.b.QC = x;						\
355     asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
356   }
357 #endif
358 
359 /* Clean output buffers before execution.  */
clean_results(void)360 static void clean_results (void)
361 {
362   CLEAN(result, int, 8, 8);
363   CLEAN(result, int, 16, 4);
364   CLEAN(result, int, 32, 2);
365   CLEAN(result, int, 64, 1);
366   CLEAN(result, uint, 8, 8);
367   CLEAN(result, uint, 16, 4);
368   CLEAN(result, uint, 32, 2);
369   CLEAN(result, uint, 64, 1);
370   CLEAN(result, poly, 8, 8);
371   CLEAN(result, poly, 16, 4);
372 #if defined (__ARM_FEATURE_CRYPTO)
373   CLEAN(result, poly, 64, 1);
374 #endif
375 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
376   CLEAN(result, float, 16, 4);
377 #endif
378   CLEAN(result, float, 32, 2);
379 
380   CLEAN(result, int, 8, 16);
381   CLEAN(result, int, 16, 8);
382   CLEAN(result, int, 32, 4);
383   CLEAN(result, int, 64, 2);
384   CLEAN(result, uint, 8, 16);
385   CLEAN(result, uint, 16, 8);
386   CLEAN(result, uint, 32, 4);
387   CLEAN(result, uint, 64, 2);
388   CLEAN(result, poly, 8, 16);
389   CLEAN(result, poly, 16, 8);
390 #if defined (__ARM_FEATURE_CRYPTO)
391   CLEAN(result, poly, 64, 2);
392 #endif
393 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
394   CLEAN(result, float, 16, 8);
395 #endif
396   CLEAN(result, float, 32, 4);
397 
398   AARCH64_ONLY(CLEAN(result, float, 64, 2));
399 
400 #if defined(__aarch64__)
401   /* On AArch64, make sure to return DefaultNaN to have the same
402      results as on AArch32.  */
403   _ARM_FPSCR _afpscr;
404   asm volatile ("mrs %0,fpcr" : "=r" (_afpscr));
405   _afpscr.b.DN = 1;
406 
407   /* On AArch64, make sure to flush to zero by default, as on
408      AArch32. */
409   _afpscr.b.FZ = 1;
410 
411   asm volatile ("msr fpcr,%0" : : "r" (_afpscr));
412 #endif
413 }
414 
415 
416 /* Helpers to declare variables of various types.   */
417 #define DECL_VARIABLE(VAR, T1, W, N)		\
418   VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
419 
420 #if defined (__ARM_FEATURE_CRYPTO)
421 #define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
422   DECL_VARIABLE(VAR, T1, W, N)
423 #else
424 #define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
425 #endif
426 
427 /* Declare only 64 bits signed variants.  */
428 #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
429   DECL_VARIABLE(VAR, int, 8, 8);			\
430   DECL_VARIABLE(VAR, int, 16, 4);			\
431   DECL_VARIABLE(VAR, int, 32, 2);			\
432   DECL_VARIABLE(VAR, int, 64, 1)
433 
434 /* Declare only 64 bits unsigned variants.  */
435 #define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
436   DECL_VARIABLE(VAR, uint, 8, 8);			\
437   DECL_VARIABLE(VAR, uint, 16, 4);			\
438   DECL_VARIABLE(VAR, uint, 32, 2);			\
439   DECL_VARIABLE(VAR, uint, 64, 1)
440 
441 /* Declare only 128 bits signed variants.  */
442 #define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
443   DECL_VARIABLE(VAR, int, 8, 16);			\
444   DECL_VARIABLE(VAR, int, 16, 8);			\
445   DECL_VARIABLE(VAR, int, 32, 4);			\
446   DECL_VARIABLE(VAR, int, 64, 2)
447 
448 /* Declare only 128 bits unsigned variants.  */
449 #define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
450   DECL_VARIABLE(VAR, uint, 8, 16);			\
451   DECL_VARIABLE(VAR, uint, 16, 8);			\
452   DECL_VARIABLE(VAR, uint, 32, 4);			\
453   DECL_VARIABLE(VAR, uint, 64, 2)
454 
455 /* Declare all 64 bits variants.  */
456 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
457 #define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
458   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
459   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
460   DECL_VARIABLE(VAR, poly, 8, 8);		\
461   DECL_VARIABLE(VAR, poly, 16, 4);		\
462   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
463   DECL_VARIABLE(VAR, float, 16, 4);		\
464   DECL_VARIABLE(VAR, float, 32, 2)
465 #else
466 #define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
467   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
468   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
469   DECL_VARIABLE(VAR, poly, 8, 8);		\
470   DECL_VARIABLE(VAR, poly, 16, 4);		\
471   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
472   DECL_VARIABLE(VAR, float, 32, 2)
473 #endif
474 
475 /* Declare all 128 bits variants.  */
476 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
477 #define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
478   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
479   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
480   DECL_VARIABLE(VAR, poly, 8, 16);		\
481   DECL_VARIABLE(VAR, poly, 16, 8);		\
482   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
483   DECL_VARIABLE(VAR, float, 16, 8);		\
484   DECL_VARIABLE(VAR, float, 32, 4);		\
485   AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2))
486 #else
487 #define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
488   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
489   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
490   DECL_VARIABLE(VAR, poly, 8, 16);		\
491   DECL_VARIABLE(VAR, poly, 16, 8);		\
492   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
493   DECL_VARIABLE(VAR, float, 32, 4);		\
494   AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2))
495 #endif
496 /* Declare all variants.  */
497 #define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
498   DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
499   DECL_VARIABLE_128BITS_VARIANTS(VAR)
500 
501 /* Declare all signed variants.  */
502 #define DECL_VARIABLE_SIGNED_VARIANTS(VAR)	\
503   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
504   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
505 
506 /* Declare all unsigned variants.  */
507 #define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)	\
508   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
509   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
510 
511 /* Helpers to initialize vectors.  */
512 #define VDUP(VAR, Q, T1, T2, W, N, V)			\
513   VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
514 
515 #define VSET_LANE(VAR, Q, T1, T2, W, N, L, V)				\
516   VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
517 						   VECT_VAR(VAR, T1, W, N), \
518 						   L)
519 
520 /* We need to load initial values first, so rely on VLD1.  */
521 #define VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
522   VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
523 
524 /* Helpers to call macros with 1 constant and 5 variable
525    arguments.  */
526 #if defined (__ARM_FEATURE_CRYPTO)
527 #define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
528   MACRO(VAR1, VAR2, T1, T2, T3, W, N)
529 #else
530 #define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
531 #endif
532 
533 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
534   MACRO(VAR, , int, s, 8, 8);					\
535   MACRO(VAR, , int, s, 16, 4);					\
536   MACRO(VAR, , int, s, 32, 2);					\
537   MACRO(VAR, , int, s, 64, 1)
538 
539 #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
540   MACRO(VAR, , uint, u, 8, 8);					\
541   MACRO(VAR, , uint, u, 16, 4);					\
542   MACRO(VAR, , uint, u, 32, 2);					\
543   MACRO(VAR, , uint, u, 64, 1)
544 
545 #define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
546   MACRO(VAR, q, int, s, 8, 16);					\
547   MACRO(VAR, q, int, s, 16, 8);					\
548   MACRO(VAR, q, int, s, 32, 4);					\
549   MACRO(VAR, q, int, s, 64, 2)
550 
551 #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
552   MACRO(VAR, q, uint, u, 8, 16);				\
553   MACRO(VAR, q, uint, u, 16, 8);				\
554   MACRO(VAR, q, uint, u, 32, 4);				\
555   MACRO(VAR, q, uint, u, 64, 2)
556 
557 #define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
558   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
559   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
560 
561 #define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
562   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
563   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
564 
565 #define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
566   TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
567   TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
568 
569 #define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
570   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
571   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
572 
573 /* Helpers to call macros with 2 constant and 5 variable
574    arguments.  */
575 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
576   MACRO(VAR1, VAR2, , int, s, 8, 8);					\
577   MACRO(VAR1, VAR2, , int, s, 16, 4);					\
578   MACRO(VAR1, VAR2, , int, s, 32, 2);					\
579   MACRO(VAR1, VAR2 , , int, s, 64, 1)
580 
581 #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
582   MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
583   MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
584   MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
585   MACRO(VAR1, VAR2, , uint, u, 64, 1)
586 
587 #define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
588   MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
589   MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
590   MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
591   MACRO(VAR1, VAR2, q, int, s, 64, 2)
592 
593 #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
594   MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
595   MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
596   MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
597   MACRO(VAR1, VAR2, q, uint, u, 64, 2)
598 
599 #define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
600   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
601   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
602   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
603   MACRO(VAR1, VAR2, , poly, p, 16, 4);				\
604   MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
605 
606 #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
607   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
608   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
609   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
610   MACRO(VAR1, VAR2, q, poly, p, 16, 8);				\
611   MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
612 
613 #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
614   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
615   TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
616 
617 #define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
618   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
619   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
620 
621 #endif /* _ARM_NEON_REF_H_ */
622