1 #include <arm_neon.h>
2 #include "arm-neon-ref.h"
3 #include "compute-ref-data.h"
4 
5 /* Expected results.  */
6 
7 /* vld2/chunk 0.  */
8 VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
9 					      0xf4, 0xf5, 0xf6, 0xf7 };
10 VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
11 VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
12 VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfffffffffffffff0 };
13 VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
14 					       0xf4, 0xf5, 0xf6, 0xf7 };
15 VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
16 VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
17 VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
18 VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
19 					       0xf4, 0xf5, 0xf6, 0xf7 };
20 VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
21 VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
22 VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
23 VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
24 					       0xf4, 0xf5, 0xf6, 0xf7,
25 					       0xf8, 0xf9, 0xfa, 0xfb,
26 					       0xfc, 0xfd, 0xfe, 0xff };
27 VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
28 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
29 VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
30 					       0xfffffff2, 0xfffffff3 };
31 VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
32 						0xf4, 0xf5, 0xf6, 0xf7,
33 						0xf8, 0xf9, 0xfa, 0xfb,
34 						0xfc, 0xfd, 0xfe, 0xff };
35 VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
36 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
37 VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
38 						0xfffffff2, 0xfffffff3 };
39 VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
40 						0xf4, 0xf5, 0xf6, 0xf7,
41 						0xf8, 0xf9, 0xfa, 0xfb,
42 						0xfc, 0xfd, 0xfe, 0xff };
43 VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
44 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
45 VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
46 						  0xca00, 0xc980, 0xc900, 0xc880 };
47 VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
48 						  0xc1600000, 0xc1500000 };
49 
50 /* vld2/chunk 1.  */
51 VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
52 					      0xfc, 0xfd, 0xfe, 0xff };
53 VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
54 VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
55 VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfffffffffffffff1 };
56 VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
57 					       0xfc, 0xfd, 0xfe, 0xff };
58 VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
59 VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
60 VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
61 VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
62 					       0xfc, 0xfd, 0xfe, 0xff };
63 VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
64 VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
65 VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
66 VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
67 					       0x4, 0x5, 0x6, 0x7,
68 					       0x8, 0x9, 0xa, 0xb,
69 					       0xc, 0xd, 0xe, 0xf };
70 VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
71 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
72 VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
73 					       0xfffffff6, 0xfffffff7 };
74 VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
75 						0x4, 0x5, 0x6, 0x7,
76 						0x8, 0x9, 0xa, 0xb,
77 						0xc, 0xd, 0xe, 0xf };
78 VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
79 						0xfffc, 0xfffd, 0xfffe, 0xffff };
80 VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
81 						0xfffffff6, 0xfffffff7 };
82 VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
83 						0x4, 0x5, 0x6, 0x7,
84 						0x8, 0x9, 0xa, 0xb,
85 						0xc, 0xd, 0xe, 0xf };
86 VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
87 						0xfffc, 0xfffd, 0xfffe, 0xffff };
88 VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
89 						  0xc400, 0xc200, 0xc000, 0xbc00 };
90 VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
91 						  0xc1200000, 0xc1100000 };
92 
93 /* vld3/chunk 0.  */
94 VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
95 					      0xf4, 0xf5, 0xf6, 0xf7 };
96 VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
97 VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
98 VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0xfffffffffffffff0 };
99 VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
100 					       0xf4, 0xf5, 0xf6, 0xf7 };
101 VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
102 VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
103 VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
104 VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
105 					       0xf4, 0xf5, 0xf6, 0xf7 };
106 VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
107 VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
108 VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
109 VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
110 					       0xf4, 0xf5, 0xf6, 0xf7,
111 					       0xf8, 0xf9, 0xfa, 0xfb,
112 					       0xfc, 0xfd, 0xfe, 0xff };
113 VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
114 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
115 VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
116 					       0xfffffff2, 0xfffffff3 };
117 VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
118 						0xf4, 0xf5, 0xf6, 0xf7,
119 						0xf8, 0xf9, 0xfa, 0xfb,
120 						0xfc, 0xfd, 0xfe, 0xff };
121 VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
122 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
123 VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
124 						0xfffffff2, 0xfffffff3 };
125 VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
126 						0xf4, 0xf5, 0xf6, 0xf7,
127 						0xf8, 0xf9, 0xfa, 0xfb,
128 						0xfc, 0xfd, 0xfe, 0xff };
129 VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
130 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
131 VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
132 						  0xca00, 0xc980, 0xc900, 0xc880 };
133 VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
134 						  0xc1600000, 0xc1500000 };
135 
136 /* vld3/chunk 1.  */
137 VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
138 					      0xfc, 0xfd, 0xfe, 0xff };
139 VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
140 VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
141 VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0xfffffffffffffff1 };
142 VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
143 					       0xfc, 0xfd, 0xfe, 0xff };
144 VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
145 VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
146 VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
147 VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
148 					       0xfc, 0xfd, 0xfe, 0xff };
149 VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
150 VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
151 VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
152 VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
153 					       0x4, 0x5, 0x6, 0x7,
154 					       0x8, 0x9, 0xa, 0xb,
155 					       0xc, 0xd, 0xe, 0xf };
156 VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
157 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
158 VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
159 					       0xfffffff6, 0xfffffff7 };
160 VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
161 						0x4, 0x5, 0x6, 0x7,
162 						0x8, 0x9, 0xa, 0xb,
163 						0xc, 0xd, 0xe, 0xf };
164 VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
165 						0xfffc, 0xfffd, 0xfffe, 0xffff };
166 VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
167 						0xfffffff6, 0xfffffff7 };
168 VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
169 						0x4, 0x5, 0x6, 0x7,
170 						0x8, 0x9, 0xa, 0xb,
171 						0xc, 0xd, 0xe, 0xf };
172 VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
173 						0xfffc, 0xfffd, 0xfffe, 0xffff };
174 VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
175 						  0xc400, 0xc200, 0xc000, 0xbc00 };
176 VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
177 						  0xc1200000, 0xc1100000 };
178 
179 /* vld3/chunk 2.  */
180 VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
181 					      0x4, 0x5, 0x6, 0x7 };
182 VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff8, 0xfff9,
183 					       0xfffa, 0xfffb };
184 VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
185 VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0xfffffffffffffff2 };
186 VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
187 					       0x4, 0x5, 0x6, 0x7 };
188 VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xfff8, 0xfff9,
189 						0xfffa, 0xfffb };
190 VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
191 VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0xfffffffffffffff2 };
192 VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
193 					       0x4, 0x5, 0x6, 0x7 };
194 VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
195 						0xfffa, 0xfffb };
196 VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
197 VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
198 VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
199 					       0x14, 0x15, 0x16, 0x17,
200 					       0x18, 0x19, 0x1a, 0x1b,
201 					       0x1c, 0x1d, 0x1e, 0x1f };
202 VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
203 					       0x4, 0x5, 0x6, 0x7 };
204 VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
205 					       0xfffffffa, 0xfffffffb };
206 VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
207 						0x14, 0x15, 0x16, 0x17,
208 						0x18, 0x19, 0x1a, 0x1b,
209 						0x1c, 0x1d, 0x1e, 0x1f };
210 VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
211 						0x4, 0x5, 0x6, 0x7 };
212 VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
213 						0xfffffffa, 0xfffffffb };
214 VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
215 						0x14, 0x15, 0x16, 0x17,
216 						0x18, 0x19, 0x1a, 0x1b,
217 						0x1c, 0x1d, 0x1e, 0x1f };
218 VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
219 						0x4, 0x5, 0x6, 0x7 };
220 VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
221 						  0x4400, 0x4500, 0x4600, 0x4700 };
222 VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
223 						  0xc0c00000, 0xc0a00000 };
224 
225 /* vld4/chunk 0.  */
226 VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
227 					      0xf4, 0xf5, 0xf6, 0xf7 };
228 VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xfff0, 0xfff1,
229 					       0xfff2, 0xfff3 };
230 VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
231 VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0xfffffffffffffff0 };
232 VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
233 					       0xf4, 0xf5, 0xf6, 0xf7 };
234 VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xfff0, 0xfff1,
235 						0xfff2, 0xfff3 };
236 VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
237 VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
238 VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
239 					       0xf4, 0xf5, 0xf6, 0xf7 };
240 VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
241 VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
242 VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
243 VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
244 					       0xf4, 0xf5, 0xf6, 0xf7,
245 					       0xf8, 0xf9, 0xfa, 0xfb,
246 					       0xfc, 0xfd, 0xfe, 0xff };
247 VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
248 					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
249 VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
250 					       0xfffffff2, 0xfffffff3 };
251 VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
252 						0xf4, 0xf5, 0xf6, 0xf7,
253 						0xf8, 0xf9, 0xfa, 0xfb,
254 						0xfc, 0xfd, 0xfe, 0xff };
255 VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
256 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
257 VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
258 						0xfffffff2, 0xfffffff3 };
259 VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
260 						0xf4, 0xf5, 0xf6, 0xf7,
261 						0xf8, 0xf9, 0xfa, 0xfb,
262 						0xfc, 0xfd, 0xfe, 0xff };
263 VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
264 						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
265 VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
266 						  0xca00, 0xc980, 0xc900, 0xc880 };
267 VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
268 						  0xc1600000, 0xc1500000 };
269 
270 /* vld4/chunk 1.  */
271 VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
272 					      0xfc, 0xfd, 0xfe, 0xff };
273 VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
274 VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
275 VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0xfffffffffffffff1 };
276 VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
277 					       0xfc, 0xfd, 0xfe, 0xff };
278 VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
279 VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
280 VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
281 VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
282 					       0xfc, 0xfd, 0xfe, 0xff };
283 VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
284 VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
285 VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
286 VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
287 					       0x4, 0x5, 0x6, 0x7,
288 					       0x8, 0x9, 0xa, 0xb,
289 					       0xc, 0xd, 0xe, 0xf };
290 VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
291 					       0xfffc, 0xfffd, 0xfffe, 0xffff };
292 VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
293 					       0xfffffff6, 0xfffffff7 };
294 VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
295 						0x4, 0x5, 0x6, 0x7,
296 						0x8, 0x9, 0xa, 0xb,
297 						0xc, 0xd, 0xe, 0xf };
298 VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
299 						0xfffc, 0xfffd, 0xfffe, 0xffff };
300 VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
301 						0xfffffff6, 0xfffffff7 };
302 VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
303 						0x4, 0x5, 0x6, 0x7,
304 						0x8, 0x9, 0xa, 0xb,
305 						0xc, 0xd, 0xe, 0xf };
306 VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
307 						0xfffc, 0xfffd, 0xfffe, 0xffff };
308 VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
309 						  0xc400, 0xc200, 0xc000, 0xbc00 };
310 VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
311 						  0xc1200000, 0xc1100000 };
312 
313 /* vld4/chunk 2.  */
314 VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
315 					      0x4, 0x5, 0x6, 0x7 };
316 VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
317 VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
318 VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0xfffffffffffffff2 };
319 VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
320 					       0x4, 0x5, 0x6, 0x7 };
321 VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
322 VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
323 VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
324 VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
325 					       0x4, 0x5, 0x6, 0x7 };
326 VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
327 VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
328 VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
329 VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
330 					       0x14, 0x15, 0x16, 0x17,
331 					       0x18, 0x19, 0x1a, 0x1b,
332 					       0x1c, 0x1d, 0x1e, 0x1f };
333 VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
334 					       0x4, 0x5, 0x6, 0x7 };
335 VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
336 					       0xfffffffa, 0xfffffffb };
337 VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
338 						0x14, 0x15, 0x16, 0x17,
339 						0x18, 0x19, 0x1a, 0x1b,
340 						0x1c, 0x1d, 0x1e, 0x1f };
341 VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
342 						0x4, 0x5, 0x6, 0x7 };
343 VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
344 						0xfffffffa, 0xfffffffb };
345 VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
346 						0x14, 0x15, 0x16, 0x17,
347 						0x18, 0x19, 0x1a, 0x1b,
348 						0x1c, 0x1d, 0x1e, 0x1f };
349 VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
350 						0x4, 0x5, 0x6, 0x7 };
351 VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
352 						  0x4400, 0x4500, 0x4600, 0x4700 };
353 VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
354 						  0xc0c00000, 0xc0a00000 };
355 
356 /* vld4/chunk 3.  */
357 VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
358 					      0xc, 0xd, 0xe, 0xf };
359 VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
360 VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xfffffff6, 0xfffffff7 };
361 VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0xfffffffffffffff3 };
362 VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
363 					       0xc, 0xd, 0xe, 0xf };
364 VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
365 VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 };
366 VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
367 VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
368 					       0xc, 0xd, 0xe, 0xf };
369 VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
370 VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
371 VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
372 VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
373 					       0x24, 0x25, 0x26, 0x27,
374 					       0x28, 0x29, 0x2a, 0x2b,
375 					       0x2c, 0x2d, 0x2e, 0x2f };
376 VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
377 					       0xc, 0xd, 0xe, 0xf };
378 VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
379 					       0xfffffffe, 0xffffffff };
380 VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
381 						0x24, 0x25, 0x26, 0x27,
382 						0x28, 0x29, 0x2a, 0x2b,
383 						0x2c, 0x2d, 0x2e, 0x2f };
384 VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
385 						0xc, 0xd, 0xe, 0xf };
386 VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd,
387 						0xfffffffe, 0xffffffff };
388 VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
389 						0x24, 0x25, 0x26, 0x27,
390 						0x28, 0x29, 0x2a, 0x2b,
391 						0x2c, 0x2d, 0x2e, 0x2f };
392 VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
393 						0xc, 0xd, 0xe, 0xf };
394 VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980,
395 						  0x4a00, 0x4a80, 0x4b00, 0x4b80 };
396 VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
397 						  0xc0000000, 0xbf800000 };
398 
exec_vldX(void)399 void exec_vldX (void)
400 {
401   /* In this case, input variables are arrays of vectors.  */
402 #define DECL_VLDX(T1, W, N, X)						\
403   VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
404   VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
405 
406   /* We need to use a temporary result buffer (result_bis), because
407      the one used for other tests is not large enough. A subset of the
408      result data is moved from result_bis to result, and it is this
409      subset which is used to check the actual behavior. The next
410      macro enables to move another chunk of data from result_bis to
411      result.  */
412 #define TEST_VLDX(Q, T1, T2, W, N, X)					\
413   VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
414     /* Use dedicated init buffer, of size X */				\
415     vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X));	\
416   vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
417 		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
418   memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
419 	 sizeof(VECT_VAR(result, T1, W, N)));
420 
421   /* Overwrite "result" with the contents of "result_bis"[Y].  */
422 #define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
423   memcpy(VECT_VAR(result, T1, W, N),			\
424 	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
425 	 sizeof(VECT_VAR(result, T1, W, N)));
426 
427   /* We need all variants in 64 bits, but there is no 64x2 variant.  */
428 #define DECL_ALL_VLDX_NO_FP16(X)		\
429   DECL_VLDX(int, 8, 8, X);			\
430   DECL_VLDX(int, 16, 4, X);			\
431   DECL_VLDX(int, 32, 2, X);			\
432   DECL_VLDX(int, 64, 1, X);			\
433   DECL_VLDX(uint, 8, 8, X);			\
434   DECL_VLDX(uint, 16, 4, X);			\
435   DECL_VLDX(uint, 32, 2, X);			\
436   DECL_VLDX(uint, 64, 1, X);			\
437   DECL_VLDX(poly, 8, 8, X);			\
438   DECL_VLDX(poly, 16, 4, X);			\
439   DECL_VLDX(float, 32, 2, X);			\
440   DECL_VLDX(int, 8, 16, X);			\
441   DECL_VLDX(int, 16, 8, X);			\
442   DECL_VLDX(int, 32, 4, X);			\
443   DECL_VLDX(uint, 8, 16, X);			\
444   DECL_VLDX(uint, 16, 8, X);			\
445   DECL_VLDX(uint, 32, 4, X);			\
446   DECL_VLDX(poly, 8, 16, X);			\
447   DECL_VLDX(poly, 16, 8, X);			\
448   DECL_VLDX(float, 32, 4, X)
449 
450 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
451 #define DECL_ALL_VLDX(X)	\
452   DECL_ALL_VLDX_NO_FP16(X);	\
453   DECL_VLDX(float, 16, 4, X);	\
454   DECL_VLDX(float, 16, 8, X)
455 #else
456 #define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
457 #endif
458 
459 #define TEST_ALL_VLDX_NO_FP16(X)		\
460   TEST_VLDX(, int, s, 8, 8, X);			\
461   TEST_VLDX(, int, s, 16, 4, X);		\
462   TEST_VLDX(, int, s, 32, 2, X);		\
463   TEST_VLDX(, int, s, 64, 1, X);		\
464   TEST_VLDX(, uint, u, 8, 8, X);		\
465   TEST_VLDX(, uint, u, 16, 4, X);		\
466   TEST_VLDX(, uint, u, 32, 2, X);		\
467   TEST_VLDX(, uint, u, 64, 1, X);		\
468   TEST_VLDX(, poly, p, 8, 8, X);		\
469   TEST_VLDX(, poly, p, 16, 4, X);		\
470   TEST_VLDX(, float, f, 32, 2, X);		\
471   TEST_VLDX(q, int, s, 8, 16, X);		\
472   TEST_VLDX(q, int, s, 16, 8, X);		\
473   TEST_VLDX(q, int, s, 32, 4, X);		\
474   TEST_VLDX(q, uint, u, 8, 16, X);		\
475   TEST_VLDX(q, uint, u, 16, 8, X);		\
476   TEST_VLDX(q, uint, u, 32, 4, X);		\
477   TEST_VLDX(q, poly, p, 8, 16, X);		\
478   TEST_VLDX(q, poly, p, 16, 8, X);		\
479   TEST_VLDX(q, float, f, 32, 4, X)
480 
481 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
482 #define TEST_ALL_VLDX(X)		\
483   TEST_ALL_VLDX_NO_FP16(X);		\
484   TEST_VLDX(, float, f, 16, 4, X);	\
485   TEST_VLDX(q, float, f, 16, 8, X)
486 #else
487 #define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
488 #endif
489 
490 #define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)	\
491   TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
492   TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
493   TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
494   TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
495   TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
496   TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
497   TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
498   TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
499   TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
500   TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
501   TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
502   TEST_EXTRA_CHUNK(int, 8, 16, X, Y);		\
503   TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
504   TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
505   TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);		\
506   TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
507   TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
508   TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);		\
509   TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
510   TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
511 
512 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
513 #define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
514   TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)		\
515   TEST_EXTRA_CHUNK(float, 16, 4, X, Y);		\
516   TEST_EXTRA_CHUNK(float, 16, 8, X, Y);
517 #else
518 #define TEST_ALL_EXTRA_CHUNKS(X, Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
519 #endif
520 
521   /* vldX supports all vector types except [u]int64x2.  */
522 #define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment)		\
523     CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
524     CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
525     CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
526     CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
527     CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
528     CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
529     CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
530     CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
531     CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);	\
532     CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);	\
533     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
534 									\
535     CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
536     CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
537     CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
538     CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
539     CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
540     CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
541     CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);	\
542     CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);	\
543     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
544 
545 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
546 #define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment)			\
547   {									\
548     CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment);		\
549     CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment);	\
550     CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment);	\
551   }
552 #else
553 #define CHECK_RESULTS_VLDX(test_name, EXPECTED, comment)		\
554   { CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment); }
555 #endif
556 
557   DECL_ALL_VLDX(2);
558   DECL_ALL_VLDX(3);
559   DECL_ALL_VLDX(4);
560 
561   /* Special input buffers of suitable size are needed for vld2/vld3/vld4.  */
562   /* Input buffers for vld2, 1 of each size */
563   VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
564   PAD(buffer_vld2_pad, int, 8, 8);
565   VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
566   PAD(buffer_vld2_pad, int, 16, 4);
567   VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
568   PAD(buffer_vld2_pad, int, 32, 2);
569   VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
570   PAD(buffer_vld2_pad, int, 64, 1);
571   VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
572   PAD(buffer_vld2_pad, uint, 8, 8);
573   VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
574   PAD(buffer_vld2_pad, uint, 16, 4);
575   VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
576   PAD(buffer_vld2_pad, uint, 32, 2);
577   VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
578   PAD(buffer_vld2_pad, uint, 64, 1);
579   VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8);
580   PAD(buffer_vld2_pad, poly, 8, 8);
581   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
582   PAD(buffer_vld2_pad, poly, 16, 4);
583 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
584   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
585   PAD(buffer_vld2_pad, float, 16, 4);
586 #endif
587   VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
588   PAD(buffer_vld2_pad, float, 32, 2);
589 
590   VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
591   PAD(buffer_vld2_pad, int, 8, 16);
592   VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
593   PAD(buffer_vld2_pad, int, 16, 8);
594   VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
595   PAD(buffer_vld2_pad, int, 32, 4);
596   VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
597   PAD(buffer_vld2_pad, int, 64, 2);
598   VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
599   PAD(buffer_vld2_pad, uint, 8, 16);
600   VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
601   PAD(buffer_vld2_pad, uint, 16, 8);
602   VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
603   PAD(buffer_vld2_pad, uint, 32, 4);
604   VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
605   PAD(buffer_vld2_pad, uint, 64, 2);
606   VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16);
607   PAD(buffer_vld2_pad, poly, 8, 16);
608   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
609   PAD(buffer_vld2_pad, poly, 16, 8);
610 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
611   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
612   PAD(buffer_vld2_pad, float, 16, 8);
613 #endif
614   VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
615   PAD(buffer_vld2_pad, float, 32, 4);
616 
617   /* Input buffers for vld3, 1 of each size */
618   VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
619   PAD(buffer_vld3_pad, int, 8, 8);
620   VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
621   PAD(buffer_vld3_pad, int, 16, 4);
622   VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
623   PAD(buffer_vld3_pad, int, 32, 2);
624   VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
625   PAD(buffer_vld3_pad, int, 64, 1);
626   VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
627   PAD(buffer_vld3_pad, uint, 8, 8);
628   VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
629   PAD(buffer_vld3_pad, uint, 16, 4);
630   VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
631   PAD(buffer_vld3_pad, uint, 32, 2);
632   VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
633   PAD(buffer_vld3_pad, uint, 64, 1);
634   VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8);
635   PAD(buffer_vld3_pad, poly, 8, 8);
636   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
637   PAD(buffer_vld3_pad, poly, 16, 4);
638 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
639   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
640   PAD(buffer_vld3_pad, float, 16, 4);
641 #endif
642   VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
643   PAD(buffer_vld3_pad, float, 32, 2);
644 
645   VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
646   PAD(buffer_vld3_pad, int, 8, 16);
647   VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
648   PAD(buffer_vld3_pad, int, 16, 8);
649   VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
650   PAD(buffer_vld3_pad, int, 32, 4);
651   VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
652   PAD(buffer_vld3_pad, int, 64, 2);
653   VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
654   PAD(buffer_vld3_pad, uint, 8, 16);
655   VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
656   PAD(buffer_vld3_pad, uint, 16, 8);
657   VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
658   PAD(buffer_vld3_pad, uint, 32, 4);
659   VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
660   PAD(buffer_vld3_pad, uint, 64, 2);
661   VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16);
662   PAD(buffer_vld3_pad, poly, 8, 16);
663   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
664   PAD(buffer_vld3_pad, poly, 16, 8);
665 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
666   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
667   PAD(buffer_vld3_pad, float, 16, 8);
668 #endif
669   VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
670   PAD(buffer_vld3_pad, float, 32, 4);
671 
672   /* Input buffers for vld4, 1 of each size */
673   VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
674   PAD(buffer_vld4_pad, int, 8, 8);
675   VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
676   PAD(buffer_vld4_pad, int, 16, 4);
677   VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
678   PAD(buffer_vld4_pad, int, 32, 2);
679   VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
680   PAD(buffer_vld4_pad, int, 64, 1);
681   VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
682   PAD(buffer_vld4_pad, uint, 8, 8);
683   VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
684   PAD(buffer_vld4_pad, uint, 16, 4);
685   VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
686   PAD(buffer_vld4_pad, uint, 32, 2);
687   VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
688   PAD(buffer_vld4_pad, uint, 64, 1);
689   VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8);
690   PAD(buffer_vld4_pad, poly, 8, 8);
691   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
692   PAD(buffer_vld4_pad, poly, 16, 4);
693 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
694   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
695   PAD(buffer_vld4_pad, float, 16, 4);
696 #endif
697   VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
698   PAD(buffer_vld4_pad, float, 32, 2);
699 
700   VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
701   PAD(buffer_vld4_pad, int, 8, 16);
702   VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
703   PAD(buffer_vld4_pad, int, 16, 8);
704   VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
705   PAD(buffer_vld4_pad, int, 32, 4);
706   VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
707   PAD(buffer_vld4_pad, int, 64, 2);
708   VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
709   PAD(buffer_vld4_pad, uint, 8, 16);
710   VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
711   PAD(buffer_vld4_pad, uint, 16, 8);
712   VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
713   PAD(buffer_vld4_pad, uint, 32, 4);
714   VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
715   PAD(buffer_vld4_pad, uint, 64, 2);
716   VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16);
717   PAD(buffer_vld4_pad, poly, 8, 16);
718   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
719   PAD(buffer_vld4_pad, poly, 16, 8);
720 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
721   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
722   PAD(buffer_vld4_pad, float, 16, 8);
723 #endif
724   VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
725   PAD(buffer_vld4_pad, float, 32, 4);
726 
727   /* Check vld2/vld2q.  */
728   clean_results ();
729 #define TEST_MSG "VLD2/VLD2Q"
730   TEST_ALL_VLDX(2);
731   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_0, "chunk 0");
732 
733   TEST_ALL_EXTRA_CHUNKS(2, 1);
734   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_1, "chunk 1");
735 
736   /* Check vld3/vld3q.  */
737   clean_results ();
738 #undef TEST_MSG
739 #define TEST_MSG "VLD3/VLD3Q"
740   TEST_ALL_VLDX(3);
741   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_0, "chunk 0");
742 
743   TEST_ALL_EXTRA_CHUNKS(3, 1);
744   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_1, "chunk 1");
745 
746   TEST_ALL_EXTRA_CHUNKS(3, 2);
747   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_2, "chunk 2");
748 
749   /* Check vld4/vld4q.  */
750   clean_results ();
751 #undef TEST_MSG
752 #define TEST_MSG "VLD4/VLD4Q"
753   TEST_ALL_VLDX(4);
754   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_0, "chunk 0");
755 
756   TEST_ALL_EXTRA_CHUNKS(4, 1);
757   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_1, "chunk 1");
758 
759   TEST_ALL_EXTRA_CHUNKS(4, 2);
760   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_2, "chunk 2");
761 
762   TEST_ALL_EXTRA_CHUNKS(4, 3);
763   CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_3, "chunk 3");
764 }
765 
main(void)766 int main (void)
767 {
768   exec_vldX ();
769   return 0;
770 }
771