1 #include <arm_neon.h>
2 #include "arm-neon-ref.h"
3 #include "compute-ref-data.h"
4 
5 /* Expected values of cumulative_saturation flag with input=0.  */
6 int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;
7 int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;
8 int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;
9 int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;
10 int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;
11 int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;
12 int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;
13 int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;
14 int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;
15 int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;
16 int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;
17 int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;
18 int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;
19 int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;
20 int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;
21 int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;
22 
23 /* Expected results with input=0.  */
24 VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
25 					 0x0, 0x0, 0x0, 0x0 };
26 VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
27 VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };
28 VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };
29 VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
30 					  0x0, 0x0, 0x0, 0x0 };
31 VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
32 VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };
33 VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };
34 VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
35 					  0x0, 0x0, 0x0, 0x0,
36 					  0x0, 0x0, 0x0, 0x0,
37 					  0x0, 0x0, 0x0, 0x0 };
38 VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
39 					  0x0, 0x0, 0x0, 0x0 };
40 VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
41 VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };
42 VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
43 					   0x0, 0x0, 0x0, 0x0,
44 					   0x0, 0x0, 0x0, 0x0,
45 					   0x0, 0x0, 0x0, 0x0 };
46 VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
47 					   0x0, 0x0, 0x0, 0x0 };
48 VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
49 VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };
50 
51 /* Expected values of cumulative_saturation flag with input=0 and
52    negative shift amount.  */
53 int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;
54 int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;
55 int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;
56 int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;
57 int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;
58 int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;
59 int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;
60 int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;
61 int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;
62 int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;
63 int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;
64 int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;
65 int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;
66 int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;
67 int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;
68 int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;
69 
70 /* Expected results with input=0 and negative shift amount.  */
71 VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
72 					     0x0, 0x0, 0x0, 0x0 };
73 VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
74 VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };
75 VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };
76 VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
77 					      0x0, 0x0, 0x0, 0x0 };
78 VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
79 VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };
80 VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };
81 VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
82 					      0x0, 0x0, 0x0, 0x0,
83 					      0x0, 0x0, 0x0, 0x0,
84 					      0x0, 0x0, 0x0, 0x0 };
85 VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
86 					      0x0, 0x0, 0x0, 0x0 };
87 VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
88 VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };
89 VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
90 					       0x0, 0x0, 0x0, 0x0,
91 					       0x0, 0x0, 0x0, 0x0,
92 					       0x0, 0x0, 0x0, 0x0 };
93 VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
94 					       0x0, 0x0, 0x0, 0x0 };
95 VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
96 VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };
97 
98 /* Expected values of cumulative_saturation flag.  */
99 int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
100 int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
101 int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
102 int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
103 int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
104 int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
105 int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
106 int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
107 int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;
108 int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;
109 int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;
110 int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;
111 int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
112 int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
113 int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
114 int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
115 
116 /* Expected results.  */
117 VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
118 				       0xe8, 0xea, 0xec, 0xee };
119 VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
120 VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
121 VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };
122 VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
123 					0xff, 0xff, 0xff, 0xff };
124 VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
125 VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
126 VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };
127 VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
128 					0x80, 0x80, 0x80, 0x80,
129 					0x80, 0x80, 0x80, 0x80,
130 					0x80, 0x80, 0x80, 0x80 };
131 VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
132 					0x8000, 0x8000, 0x8000, 0x8000 };
133 VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,
134 					0x80000000, 0x80000000 };
135 VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000,
136 					0x8000000000000000 };
137 VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
138 					 0xff, 0xff, 0xff, 0xff,
139 					 0xff, 0xff, 0xff, 0xff,
140 					 0xff, 0xff, 0xff, 0xff };
141 VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
142 					 0xffff, 0xffff, 0xffff, 0xffff };
143 VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
144 					 0xffffffff, 0xffffffff };
145 VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
146 					 0xffffffffffffffff };
147 
148 /* Expected values of cumulative_sat_saturation flag with negative shift
149    amount.  */
150 int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;
151 int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;
152 int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;
153 int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;
154 int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;
155 int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;
156 int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;
157 int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;
158 int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;
159 int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;
160 int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;
161 int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;
162 int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;
163 int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;
164 int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;
165 int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;
166 
167 /* Expected results with negative shift amount.  */
168 VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
169 					   0xfa, 0xfa, 0xfb, 0xfb };
170 VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc };
171 VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };
172 VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };
173 VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
174 					    0x7a, 0x7a, 0x7b, 0x7b };
175 VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc };
176 VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };
177 VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };
178 VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
179 					    0xff, 0xff, 0xff, 0xff,
180 					    0xff, 0xff, 0xff, 0xff,
181 					    0xff, 0xff, 0xff, 0xff };
182 VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
183 					    0xffff, 0xffff, 0xffff, 0xffff };
184 VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff,
185 					    0xffffffff, 0xffffffff };
186 VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff,
187 					    0xffffffffffffffff };
188 VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,
189 					     0x1, 0x1, 0x1, 0x1,
190 					     0x1, 0x1, 0x1, 0x1,
191 					     0x1, 0x1, 0x1, 0x1 };
192 VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f,
193 					     0x1f, 0x1f, 0x1f, 0x1f };
194 VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff,
195 					     0x7ffff, 0x7ffff };
196 VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff };
197 
198 /* Expected values of cumulative_sat_saturation flag with negative
199    input and large shift amount.  */
200 int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1;
201 int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1;
202 int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1;
203 int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1;
204 int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1;
205 int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1;
206 int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1;
207 int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1;
208 int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1;
209 int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1;
210 int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1;
211 int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1;
212 int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1;
213 int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1;
214 int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1;
215 int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1;
216 
217 /* Expected results with negative input and large shift amount.  */
218 VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
219 						 0x80, 0x80, 0x80, 0x80 };
220 VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000,
221 						  0x8000, 0x8000 };
222 VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 };
223 VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 };
224 VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
225 						  0xff, 0xff, 0xff, 0xff };
226 VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff,
227 						   0xffff, 0xffff };
228 VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
229 VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff };
230 VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
231 						  0x80, 0x80, 0x80, 0x80,
232 						  0x80, 0x80, 0x80, 0x80,
233 						  0x80, 0x80, 0x80, 0x80 };
234 VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000,
235 						  0x8000, 0x8000,
236 						  0x8000, 0x8000,
237 						  0x8000, 0x8000 };
238 VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000,
239 						  0x80000000, 0x80000000 };
240 VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000,
241 						  0x8000000000000000 };
242 VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
243 						   0xff, 0xff, 0xff, 0xff,
244 						   0xff, 0xff, 0xff, 0xff,
245 						   0xff, 0xff, 0xff, 0xff };
246 VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff,
247 						   0xffff, 0xffff,
248 						   0xffff, 0xffff,
249 						   0xffff, 0xffff };
250 VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
251 						   0xffffffff, 0xffffffff };
252 VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff,
253 						   0xffffffffffffffff };
254 
255 /* Expected values of cumulative_sat_saturation flag with max input
256    and shift by -1.  */
257 int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0;
258 int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0;
259 int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0;
260 int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0;
261 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0;
262 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0;
263 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0;
264 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0;
265 int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0;
266 int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0;
267 int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0;
268 int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0;
269 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0;
270 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0;
271 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0;
272 int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0;
273 
274 /* Expected results with max input and shift by -1.  */
275 VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
276 						  0x3f, 0x3f, 0x3f, 0x3f };
277 VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff,
278 						   0x3fff, 0x3fff };
279 VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff };
280 VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff };
281 VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
282 						   0x7f, 0x7f, 0x7f, 0x7f };
283 VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff,
284 						    0x7fff, 0x7fff };
285 VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff };
286 VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff };
287 VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f,
288 						   0x3f, 0x3f, 0x3f, 0x3f,
289 						   0x3f, 0x3f, 0x3f, 0x3f,
290 						   0x3f, 0x3f, 0x3f, 0x3f };
291 VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff,
292 						   0x3fff, 0x3fff,
293 						   0x3fff, 0x3fff,
294 						   0x3fff, 0x3fff };
295 VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff,
296 						   0x3fffffff, 0x3fffffff };
297 VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff,
298 						   0x3fffffffffffffff };
299 VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
300 						    0x7f, 0x7f, 0x7f, 0x7f,
301 						    0x7f, 0x7f, 0x7f, 0x7f,
302 						    0x7f, 0x7f, 0x7f, 0x7f };
303 VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff,
304 						    0x7fff, 0x7fff,
305 						    0x7fff, 0x7fff,
306 						    0x7fff, 0x7fff };
307 VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
308 						    0x7fffffff, 0x7fffffff };
309 VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff,
310 						    0x7fffffffffffffff };
311 
312 /* Expected values of cumulative_sat_saturation flag with max input
313    and large shift amount.  */
314 int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1;
315 int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1;
316 int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1;
317 int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1;
318 int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1;
319 int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1;
320 int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1;
321 int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1;
322 int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1;
323 int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1;
324 int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1;
325 int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1;
326 int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1;
327 int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1;
328 int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1;
329 int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1;
330 
331 /* Expected results with max input and large shift amount.  */
332 VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
333 					       0x7f, 0x7f, 0x7f, 0x7f };
334 VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff,
335 						0x7fff, 0x7fff };
336 VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
337 VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff };
338 VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
339 						0xff, 0xff, 0xff, 0xff };
340 VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff,
341 						 0xffff, 0xffff };
342 VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };
343 VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff };
344 VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
345 						0x7f, 0x7f, 0x7f, 0x7f,
346 						0x7f, 0x7f, 0x7f, 0x7f,
347 						0x7f, 0x7f, 0x7f, 0x7f };
348 VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff,
349 						0x7fff, 0x7fff,
350 						0x7fff, 0x7fff,
351 						0x7fff, 0x7fff };
352 VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
353 						0x7fffffff, 0x7fffffff };
354 VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff,
355 						0x7fffffffffffffff };
356 VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
357 						 0xff, 0xff, 0xff, 0xff,
358 						 0xff, 0xff, 0xff, 0xff,
359 						 0xff, 0xff, 0xff, 0xff };
360 VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff,
361 						 0xffff, 0xffff,
362 						 0xffff, 0xffff,
363 						 0xffff, 0xffff };
364 VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,
365 						 0xffffffff, 0xffffffff };
366 VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff,
367 						 0xffffffffffffffff };
368 
369 /* Expected values of cumulative_sat_saturation flag with saturation
370    on 64-bits values.  */
371 int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1;
372 int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1;
373 
374 /* Expected results with saturation on 64-bits values..  */
375 VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 };
376 VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff,
377 					   0x7fffffffffffffff };
378 
379 #define INSN vqshl
380 #define TEST_MSG "VQSHL/VQSHLQ"
381 
382 #define FNNAME1(NAME) void exec_ ## NAME (void)
383 #define FNNAME(NAME) FNNAME1(NAME)
384 
FNNAME(INSN)385 FNNAME (INSN)
386 {
387   /* Basic test: v3=vqshl(v1,v2), then store the result.  */
388 #define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
389   Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
390   VECT_VAR(vector_res, T1, W, N) =					\
391     INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
392 		      VECT_VAR(vector_shift, T3, W, N));		\
393   vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
394 		    VECT_VAR(vector_res, T1, W, N));			\
395   CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
396 
397   /* Two auxliary macros are necessary to expand INSN */
398 #define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
399   TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
400 
401 #define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
402   TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
403 
404 
405   DECL_VARIABLE_ALL_VARIANTS(vector);
406   DECL_VARIABLE_ALL_VARIANTS(vector_res);
407 
408   DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
409 
410   clean_results ();
411 
412   /* Fill input vector with 0, to check saturation on limits.  */
413   VDUP(vector, , int, s, 8, 8, 0);
414   VDUP(vector, , int, s, 16, 4, 0);
415   VDUP(vector, , int, s, 32, 2, 0);
416   VDUP(vector, , int, s, 64, 1, 0);
417   VDUP(vector, , uint, u, 8, 8, 0);
418   VDUP(vector, , uint, u, 16, 4, 0);
419   VDUP(vector, , uint, u, 32, 2, 0);
420   VDUP(vector, , uint, u, 64, 1, 0);
421   VDUP(vector, q, int, s, 8, 16, 0);
422   VDUP(vector, q, int, s, 16, 8, 0);
423   VDUP(vector, q, int, s, 32, 4, 0);
424   VDUP(vector, q, int, s, 64, 2, 0);
425   VDUP(vector, q, uint, u, 8, 16, 0);
426   VDUP(vector, q, uint, u, 16, 8, 0);
427   VDUP(vector, q, uint, u, 32, 4, 0);
428   VDUP(vector, q, uint, u, 64, 2, 0);
429 
430   /* Choose init value arbitrarily, will be used as shift amount */
431   /* Use values equal or one-less-than the type width to check
432      behavior on limits.  */
433 
434   /* 64-bits vectors first.  */
435   /* Shift 8-bits lanes by 7...  */
436   VDUP(vector_shift, , int, s, 8, 8, 7);
437   /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9).  */
438   VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);
439   VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);
440   VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);
441 
442   /* Shift 16-bits lanes by 15... */
443   VDUP(vector_shift, , int, s, 16, 4, 15);
444   /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17).  */
445   VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);
446   VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);
447   VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);
448 
449   /* Shift 32-bits lanes by 31... */
450   VDUP(vector_shift, , int, s, 32, 2, 31);
451   /* ... except lane 1 (by 30).  */
452   VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);
453 
454   /* Shift 64 bits lane by 63.  */
455   VDUP(vector_shift, , int, s, 64, 1, 63);
456 
457   /* 128-bits vectors.  */
458   /* Shift 8-bits lanes by 8.  */
459   VDUP(vector_shift, q, int, s, 8, 16, 8);
460   /* Shift 16-bits lanes by 16.  */
461   VDUP(vector_shift, q, int, s, 16, 8, 16);
462   /* Shift 32-bits lanes by 32...  */
463   VDUP(vector_shift, q, int, s, 32, 4, 32);
464   /* ... except lane 1 (by 33).  */
465   VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);
466 
467   /* Shift 64-bits lanes by 64... */
468   VDUP(vector_shift, q, int, s, 64, 2, 64);
469   /* ... except lane 1 (by 62).  */
470   VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);
471 
472 #define CMT " (with input = 0)"
473   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);
474   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);
475   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);
476   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);
477   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);
478   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);
479   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);
480   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);
481   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);
482   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);
483   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);
484   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);
485   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);
486   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);
487   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);
488   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);
489 
490   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);
491   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);
492   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);
493   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);
494   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);
495   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);
496   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);
497   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);
498   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);
499   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);
500   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);
501   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);
502   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);
503   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);
504   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);
505   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);
506 
507 
508   /* Use negative shift amounts */
509   VDUP(vector_shift, , int, s, 8, 8, -1);
510   VDUP(vector_shift, , int, s, 16, 4, -2);
511   VDUP(vector_shift, , int, s, 32, 2, -3);
512   VDUP(vector_shift, , int, s, 64, 1, -4);
513   VDUP(vector_shift, q, int, s, 8, 16, -7);
514   VDUP(vector_shift, q, int, s, 16, 8, -11);
515   VDUP(vector_shift, q, int, s, 32, 4, -13);
516   VDUP(vector_shift, q, int, s, 64, 2, -20);
517 
518 #undef CMT
519 #define CMT " (input 0 and negative shift amount)"
520   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);
521   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);
522   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);
523   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);
524   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);
525   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);
526   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);
527   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);
528   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);
529   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);
530   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);
531   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);
532   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);
533   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);
534   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);
535   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);
536 
537   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);
538   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);
539   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);
540   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);
541   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);
542   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);
543   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);
544   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);
545   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);
546   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);
547   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);
548   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);
549   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);
550   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);
551   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);
552   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);
553 
554   /* Test again, with predefined input values.  */
555   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
556 
557   /* Choose init value arbitrarily, will be used as shift amount.  */
558   VDUP(vector_shift, , int, s, 8, 8, 1);
559   VDUP(vector_shift, , int, s, 16, 4, 3);
560   VDUP(vector_shift, , int, s, 32, 2, 8);
561   VDUP(vector_shift, , int, s, 64, 1, -3);
562   VDUP(vector_shift, q, int, s, 8, 16, 10);
563   VDUP(vector_shift, q, int, s, 16, 8, 12);
564   VDUP(vector_shift, q, int, s, 32, 4, 32);
565   VDUP(vector_shift, q, int, s, 64, 2, 63);
566 
567 #undef CMT
568 #define CMT ""
569   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);
570   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);
571   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);
572   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);
573   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);
574   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);
575   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);
576   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);
577   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);
578   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);
579   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);
580   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);
581   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);
582   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);
583   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);
584   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);
585 
586   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
587   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
588   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
589   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
590   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
591   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
592   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
593   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
594   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
595   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
596   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
597   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
598   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
599   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
600   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
601   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
602 
603 
604   /* Use negative shift amounts */
605   VDUP(vector_shift, , int, s, 8, 8, -1);
606   VDUP(vector_shift, , int, s, 16, 4, -2);
607   VDUP(vector_shift, , int, s, 32, 2, -3);
608   VDUP(vector_shift, , int, s, 64, 1, -4);
609   VDUP(vector_shift, q, int, s, 8, 16, -7);
610   VDUP(vector_shift, q, int, s, 16, 8, -11);
611   VDUP(vector_shift, q, int, s, 32, 4, -13);
612   VDUP(vector_shift, q, int, s, 64, 2, -20);
613 
614 #undef CMT
615 #define CMT " (negative shift amount)"
616   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);
617   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);
618   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);
619   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);
620   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);
621   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);
622   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);
623   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);
624   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);
625   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);
626   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);
627   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);
628   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);
629   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);
630   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);
631   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);
632 
633   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);
634   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);
635   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);
636   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);
637   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
638   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
639   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
640   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
641   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);
642   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);
643   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);
644   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);
645   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
646   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
647   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
648   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
649 
650 
651   /* Use large shift amounts.  */
652   VDUP(vector_shift, , int, s, 8, 8, 8);
653   VDUP(vector_shift, , int, s, 16, 4, 16);
654   VDUP(vector_shift, , int, s, 32, 2, 32);
655   VDUP(vector_shift, , int, s, 64, 1, 64);
656   VDUP(vector_shift, q, int, s, 8, 16, 8);
657   VDUP(vector_shift, q, int, s, 16, 8, 16);
658   VDUP(vector_shift, q, int, s, 32, 4, 32);
659   VDUP(vector_shift, q, int, s, 64, 2, 64);
660 
661 #undef CMT
662 #define CMT " (large shift amount, negative input)"
663   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT);
664   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT);
665   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT);
666   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT);
667   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT);
668   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT);
669   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT);
670   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT);
671   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT);
672   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT);
673   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT);
674   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT);
675   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT);
676   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT);
677   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT);
678   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT);
679 
680   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT);
681   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT);
682   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT);
683   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT);
684   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT);
685   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT);
686   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT);
687   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT);
688   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT);
689   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT);
690   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT);
691   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT);
692   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT);
693   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT);
694   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT);
695   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT);
696 
697 
698   /* Fill input vector with max value, to check saturation on limits */
699   VDUP(vector, , int, s, 8, 8, 0x7F);
700   VDUP(vector, , int, s, 16, 4, 0x7FFF);
701   VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
702   VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
703   VDUP(vector, , uint, u, 8, 8, 0xFF);
704   VDUP(vector, , uint, u, 16, 4, 0xFFFF);
705   VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
706   VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
707   VDUP(vector, q, int, s, 8, 16, 0x7F);
708   VDUP(vector, q, int, s, 16, 8, 0x7FFF);
709   VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
710   VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
711   VDUP(vector, q, uint, u, 8, 16, 0xFF);
712   VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
713   VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
714   VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
715 
716   /* Shift by -1 */
717   VDUP(vector_shift, , int, s, 8, 8, -1);
718   VDUP(vector_shift, , int, s, 16, 4, -1);
719   VDUP(vector_shift, , int, s, 32, 2, -1);
720   VDUP(vector_shift, , int, s, 64, 1, -1);
721   VDUP(vector_shift, q, int, s, 8, 16, -1);
722   VDUP(vector_shift, q, int, s, 16, 8, -1);
723   VDUP(vector_shift, q, int, s, 32, 4, -1);
724   VDUP(vector_shift, q, int, s, 64, 2, -1);
725 
726 #undef CMT
727 #define CMT " (max input, shift by -1)"
728   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT);
729   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT);
730   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT);
731   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT);
732   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT);
733   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT);
734   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT);
735   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT);
736   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT);
737   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT);
738   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT);
739   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT);
740   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT);
741   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT);
742   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT);
743   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT);
744 
745   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT);
746   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT);
747   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT);
748   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT);
749   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT);
750   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT);
751   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT);
752   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT);
753   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT);
754   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT);
755   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT);
756   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT);
757   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT);
758   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT);
759   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT);
760   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT);
761 
762 
763   /* Use large shift amounts */
764   VDUP(vector_shift, , int, s, 8, 8, 8);
765   VDUP(vector_shift, , int, s, 16, 4, 16);
766   VDUP(vector_shift, , int, s, 32, 2, 32);
767   VDUP(vector_shift, , int, s, 64, 1, 64);
768   VDUP(vector_shift, q, int, s, 8, 16, 8);
769   VDUP(vector_shift, q, int, s, 16, 8, 16);
770   VDUP(vector_shift, q, int, s, 32, 4, 32);
771   VDUP(vector_shift, q, int, s, 64, 2, 64);
772 
773 #undef CMT
774 #define CMT " (max input, large shift amount)"
775   TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT);
776   TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT);
777   TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT);
778   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT);
779   TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT);
780   TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT);
781   TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT);
782   TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT);
783   TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT);
784   TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT);
785   TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT);
786   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT);
787   TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT);
788   TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT);
789   TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT);
790   TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT);
791 
792   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT);
793   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT);
794   CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT);
795   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT);
796   CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT);
797   CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT);
798   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT);
799   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT);
800   CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT);
801   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT);
802   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT);
803   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT);
804   CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT);
805   CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT);
806   CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT);
807   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT);
808 
809 
810   /* Check 64 bits saturation.  */
811   VDUP(vector, , int, s, 64, 1, -10);
812   VDUP(vector_shift, , int, s, 64, 1, 64);
813   VDUP(vector, q, int, s, 64, 2, 10);
814   VDUP(vector_shift, q, int, s, 64, 2, 64);
815 
816 #undef CMT
817 #define CMT " (check saturation on 64 bits)"
818   TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT);
819   TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT);
820 
821   CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT);
822   CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT);
823 }
824 
main(void)825 int main (void)
826 {
827   exec_vqshl ();
828   return 0;
829 }
830