1 #include <arm_neon.h>
2 #include "arm-neon-ref.h"
3 #include "compute-ref-data.h"
4 
5 #if defined(__aarch64__) && defined(__ARM_FEATURE_FMA)
6 
7 #define A0 123.4f
8 #define A1 -3.8f
9 #define A2 -29.4f
10 #define A3 (__builtin_inff ())
11 #define A4 0.0f
12 #define A5 24.0f
13 #define A6 124.0f
14 #define A7 1024.0f
15 
16 #define B0 -5.8f
17 #define B1 -0.0f
18 #define B2 -10.8f
19 #define B3 10.0f
20 #define B4 23.4f
21 #define B5 -1234.8f
22 #define B6 8.9f
23 #define B7 4.0f
24 
25 #define E0 9.8f
26 #define E1 -1024.0f
27 #define E2 (-__builtin_inff ())
28 #define E3 479.0f
29 float32_t elem0 = E0;
30 float32_t elem1 = E1;
31 float32_t elem2 = E2;
32 float32_t elem3 = E3;
33 
34 #define DA0 1231234.4
35 #define DA1 -3.8
36 #define DA2 -2980.4
37 #define DA3 -5.8
38 #define DA4 0.01123
39 #define DA5 24.0
40 #define DA6 124.12345
41 #define DA7 1024.0
42 
43 #define DB0 -5.8
44 #define DB1 (__builtin_inf ())
45 #define DB2 -105.8
46 #define DB3 10.0
47 #define DB4 (-__builtin_inf ())
48 #define DB5 -1234.8
49 #define DB6 848.9
50 #define DB7 44444.0
51 
52 #define DE0 9.8
53 #define DE1 -1024.0
54 #define DE2 105.8
55 #define DE3 479.0
56 float64_t delem0 = DE0;
57 float64_t delem1 = DE1;
58 float64_t delem2 = DE2;
59 float64_t delem3 = DE3;
60 
61 /* Expected results for vfms_n.  */
62 
63 VECT_VAR_DECL(expectedfms0, float, 32, 2) [] = {A0 + -B0 * E0, A1 + -B1 * E0};
64 VECT_VAR_DECL(expectedfms1, float, 32, 2) [] = {A2 + -B2 * E1, A3 + -B3 * E1};
65 VECT_VAR_DECL(expectedfms2, float, 32, 2) [] = {A4 + -B4 * E2, A5 + -B5 * E2};
66 VECT_VAR_DECL(expectedfms3, float, 32, 2) [] = {A6 + -B6 * E3, A7 + -B7 * E3};
67 VECT_VAR_DECL(expectedfma0, float, 32, 2) [] = {A0 + B0 * E0, A1 + B1 * E0};
68 VECT_VAR_DECL(expectedfma1, float, 32, 2) [] = {A2 + B2 * E1, A3 + B3 * E1};
69 VECT_VAR_DECL(expectedfma2, float, 32, 2) [] = {A4 + B4 * E2, A5 + B5 * E2};
70 VECT_VAR_DECL(expectedfma3, float, 32, 2) [] = {A6 + B6 * E3, A7 + B7 * E3};
71 
72 hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 2) =
73   (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 2);
74 hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 2) =
75   (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 2);
76 hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 2) =
77   (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 2);
78 hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 2) =
79   (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 2);
80 hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 2) =
81   (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 2);
82 hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 2) =
83   (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 2);
84 hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 2) =
85   (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 2);
86 hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 2) =
87   (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 2);
88 
89 
90 VECT_VAR_DECL(expectedfms0, float, 32, 4) [] = {A0 + -B0 * E0, A1 + -B1 * E0,
91 						A2 + -B2 * E0, A3 + -B3 * E0};
92 VECT_VAR_DECL(expectedfms1, float, 32, 4) [] = {A4 + -B4 * E1, A5 + -B5 * E1,
93 						A6 + -B6 * E1, A7 + -B7 * E1};
94 VECT_VAR_DECL(expectedfms2, float, 32, 4) [] = {A0 + -B0 * E2, A2 + -B2 * E2,
95 						A4 + -B4 * E2, A6 + -B6 * E2};
96 VECT_VAR_DECL(expectedfms3, float, 32, 4) [] = {A1 + -B1 * E3, A3 + -B3 * E3,
97 						A5 + -B5 * E3, A7 + -B7 * E3};
98 VECT_VAR_DECL(expectedfma0, float, 32, 4) [] = {A0 + B0 * E0, A1 + B1 * E0,
99 						A2 + B2 * E0, A3 + B3 * E0};
100 VECT_VAR_DECL(expectedfma1, float, 32, 4) [] = {A4 + B4 * E1, A5 + B5 * E1,
101 						A6 + B6 * E1, A7 + B7 * E1};
102 VECT_VAR_DECL(expectedfma2, float, 32, 4) [] = {A0 + B0 * E2, A2 + B2 * E2,
103 						A4 + B4 * E2, A6 + B6 * E2};
104 VECT_VAR_DECL(expectedfma3, float, 32, 4) [] = {A1 + B1 * E3, A3 + B3 * E3,
105 						A5 + B5 * E3, A7 + B7 * E3};
106 
107 hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 4) =
108   (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 4);
109 hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 4) =
110   (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 4);
111 hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 4) =
112   (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 4);
113 hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 4) =
114   (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 4);
115 hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 4) =
116   (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 4);
117 hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 4) =
118   (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 4);
119 hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 4) =
120   (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 4);
121 hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 4) =
122   (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 4);
123 
124 VECT_VAR_DECL(expectedfms0, float, 64, 2) [] = {DA0 + -DB0 * DE0,
125 						DA1 + -DB1 * DE0};
126 VECT_VAR_DECL(expectedfms1, float, 64, 2) [] = {DA2 + -DB2 * DE1,
127 						DA3 + -DB3 * DE1};
128 VECT_VAR_DECL(expectedfms2, float, 64, 2) [] = {DA4 + -DB4 * DE2,
129 						DA5 + -DB5 * DE2};
130 VECT_VAR_DECL(expectedfms3, float, 64, 2) [] = {DA6 + -DB6 * DE3,
131 						DA7 + -DB7 * DE3};
132 VECT_VAR_DECL(expectedfma0, float, 64, 2) [] = {DA0 + DB0 * DE0,
133 						DA1 + DB1 * DE0};
134 VECT_VAR_DECL(expectedfma1, float, 64, 2) [] = {DA2 + DB2 * DE1,
135 						DA3 + DB3 * DE1};
136 VECT_VAR_DECL(expectedfma2, float, 64, 2) [] = {DA4 + DB4 * DE2,
137 						DA5 + DB5 * DE2};
138 VECT_VAR_DECL(expectedfma3, float, 64, 2) [] = {DA6 + DB6 * DE3,
139 						DA7 + DB7 * DE3};
140 hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 2) =
141   (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 2);
142 hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 2) =
143   (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 2);
144 hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 2) =
145   (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 2);
146 hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 2) =
147   (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 2);
148 hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 2) =
149   (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 2);
150 hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 2) =
151   (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 2);
152 hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 2) =
153   (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 2);
154 hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 2) =
155   (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 2);
156 
157 VECT_VAR_DECL(expectedfms0, float, 64, 1) [] = {DA0 + -DB0 * DE0};
158 VECT_VAR_DECL(expectedfms1, float, 64, 1) [] = {DA2 + -DB2 * DE1};
159 VECT_VAR_DECL(expectedfms2, float, 64, 1) [] = {DA4 + -DB4 * DE2};
160 VECT_VAR_DECL(expectedfms3, float, 64, 1) [] = {DA6 + -DB6 * DE3};
161 VECT_VAR_DECL(expectedfma0, float, 64, 1) [] = {DA0 + DB0 * DE0};
162 VECT_VAR_DECL(expectedfma1, float, 64, 1) [] = {DA2 + DB2 * DE1};
163 VECT_VAR_DECL(expectedfma2, float, 64, 1) [] = {DA4 + DB4 * DE2};
164 VECT_VAR_DECL(expectedfma3, float, 64, 1) [] = {DA6 + DB6 * DE3};
165 
166 hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 1) =
167   (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 1);
168 hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 1) =
169   (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 1);
170 hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 1) =
171   (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 1);
172 hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 1) =
173   (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 1);
174 hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 1) =
175   (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 1);
176 hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 1) =
177   (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 1);
178 hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 1) =
179   (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 1);
180 hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 1) =
181   (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 1);
182 
exec_vfma_vfms_n(void)183 void exec_vfma_vfms_n (void)
184 {
185 #undef TEST_MSG
186 #define TEST_MSG "VFMS_VFMA_N (FP32)"
187   clean_results ();
188 
189   DECL_VARIABLE(vsrc_1, float, 32, 2);
190   DECL_VARIABLE(vsrc_2, float, 32, 2);
191   VECT_VAR_DECL (buf_src_1, float, 32, 2) [] = {A0, A1};
192   VECT_VAR_DECL (buf_src_2, float, 32, 2) [] = {B0, B1};
193   VLOAD (vsrc_1, buf_src_1, , float, f, 32, 2);
194   VLOAD (vsrc_2, buf_src_2, , float, f, 32, 2);
195   DECL_VARIABLE (vector_res, float, 32, 2) =
196     vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
197 		VECT_VAR (vsrc_2, float, 32, 2), elem0);
198   vst1_f32 (VECT_VAR (result, float, 32, 2),
199 	    VECT_VAR (vector_res, float, 32, 2));
200   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms0_static, "");
201   VECT_VAR (vector_res, float, 32, 2) =
202     vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
203 		VECT_VAR (vsrc_2, float, 32, 2), elem0);
204   vst1_f32 (VECT_VAR (result, float, 32, 2),
205 	    VECT_VAR (vector_res, float, 32, 2));
206   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma0_static, "");
207 
208   VECT_VAR_DECL (buf_src_3, float, 32, 2) [] = {A2, A3};
209   VECT_VAR_DECL (buf_src_4, float, 32, 2) [] = {B2, B3};
210   VLOAD (vsrc_1, buf_src_3, , float, f, 32, 2);
211   VLOAD (vsrc_2, buf_src_4, , float, f, 32, 2);
212   VECT_VAR (vector_res, float, 32, 2) =
213     vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
214 		VECT_VAR (vsrc_2, float, 32, 2), elem1);
215   vst1_f32 (VECT_VAR (result, float, 32, 2),
216 	    VECT_VAR (vector_res, float, 32, 2));
217   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms1_static, "");
218   VECT_VAR (vector_res, float, 32, 2) =
219     vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
220 		VECT_VAR (vsrc_2, float, 32, 2), elem1);
221   vst1_f32 (VECT_VAR (result, float, 32, 2),
222 	    VECT_VAR (vector_res, float, 32, 2));
223   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma1_static, "");
224 
225   VECT_VAR_DECL (buf_src_5, float, 32, 2) [] = {A4, A5};
226   VECT_VAR_DECL (buf_src_6, float, 32, 2) [] = {B4, B5};
227   VLOAD (vsrc_1, buf_src_5, , float, f, 32, 2);
228   VLOAD (vsrc_2, buf_src_6, , float, f, 32, 2);
229   VECT_VAR (vector_res, float, 32, 2) =
230     vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
231 		VECT_VAR (vsrc_2, float, 32, 2), elem2);
232   vst1_f32 (VECT_VAR (result, float, 32, 2),
233 	    VECT_VAR (vector_res, float, 32, 2));
234   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms2_static, "");
235   VECT_VAR (vector_res, float, 32, 2) =
236     vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
237 		VECT_VAR (vsrc_2, float, 32, 2), elem2);
238   vst1_f32 (VECT_VAR (result, float, 32, 2),
239 	    VECT_VAR (vector_res, float, 32, 2));
240   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma2_static, "");
241 
242   VECT_VAR_DECL (buf_src_7, float, 32, 2) [] = {A6, A7};
243   VECT_VAR_DECL (buf_src_8, float, 32, 2) [] = {B6, B7};
244   VLOAD (vsrc_1, buf_src_7, , float, f, 32, 2);
245   VLOAD (vsrc_2, buf_src_8, , float, f, 32, 2);
246   VECT_VAR (vector_res, float, 32, 2) =
247     vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
248 		VECT_VAR (vsrc_2, float, 32, 2), elem3);
249   vst1_f32 (VECT_VAR (result, float, 32, 2),
250 	    VECT_VAR (vector_res, float, 32, 2));
251   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms3_static, "");
252   VECT_VAR (vector_res, float, 32, 2) =
253     vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2),
254 		VECT_VAR (vsrc_2, float, 32, 2), elem3);
255   vst1_f32 (VECT_VAR (result, float, 32, 2),
256 	    VECT_VAR (vector_res, float, 32, 2));
257   CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma3_static, "");
258 
259 #undef TEST_MSG
260 #define TEST_MSG "VFMSQ_VFMAQ_N (FP32)"
261   clean_results ();
262 
263   DECL_VARIABLE(vsrc_1, float, 32, 4);
264   DECL_VARIABLE(vsrc_2, float, 32, 4);
265   VECT_VAR_DECL (buf_src_1, float, 32, 4) [] = {A0, A1, A2, A3};
266   VECT_VAR_DECL (buf_src_2, float, 32, 4) [] = {B0, B1, B2, B3};
267   VLOAD (vsrc_1, buf_src_1, q, float, f, 32, 4);
268   VLOAD (vsrc_2, buf_src_2, q, float, f, 32, 4);
269   DECL_VARIABLE (vector_res, float, 32, 4) =
270     vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
271 		 VECT_VAR (vsrc_2, float, 32, 4), elem0);
272   vst1q_f32 (VECT_VAR (result, float, 32, 4),
273 	     VECT_VAR (vector_res, float, 32, 4));
274   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms0_static, "");
275   VECT_VAR (vector_res, float, 32, 4) =
276     vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
277 		 VECT_VAR (vsrc_2, float, 32, 4), elem0);
278   vst1q_f32 (VECT_VAR (result, float, 32, 4),
279 	     VECT_VAR (vector_res, float, 32, 4));
280   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma0_static, "");
281 
282   VECT_VAR_DECL (buf_src_3, float, 32, 4) [] = {A4, A5, A6, A7};
283   VECT_VAR_DECL (buf_src_4, float, 32, 4) [] = {B4, B5, B6, B7};
284   VLOAD (vsrc_1, buf_src_3, q, float, f, 32, 4);
285   VLOAD (vsrc_2, buf_src_4, q, float, f, 32, 4);
286   VECT_VAR (vector_res, float, 32, 4) =
287     vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
288 		 VECT_VAR (vsrc_2, float, 32, 4), elem1);
289   vst1q_f32 (VECT_VAR (result, float, 32, 4),
290 	     VECT_VAR (vector_res, float, 32, 4));
291   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms1_static, "");
292   VECT_VAR (vector_res, float, 32, 4) =
293     vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
294 		 VECT_VAR (vsrc_2, float, 32, 4), elem1);
295   vst1q_f32 (VECT_VAR (result, float, 32, 4),
296 	     VECT_VAR (vector_res, float, 32, 4));
297   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma1_static, "");
298 
299   VECT_VAR_DECL (buf_src_5, float, 32, 4) [] = {A0, A2, A4, A6};
300   VECT_VAR_DECL (buf_src_6, float, 32, 4) [] = {B0, B2, B4, B6};
301   VLOAD (vsrc_1, buf_src_5, q, float, f, 32, 4);
302   VLOAD (vsrc_2, buf_src_6, q, float, f, 32, 4);
303   VECT_VAR (vector_res, float, 32, 4) =
304     vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
305 		 VECT_VAR (vsrc_2, float, 32, 4), elem2);
306   vst1q_f32 (VECT_VAR (result, float, 32, 4),
307 	     VECT_VAR (vector_res, float, 32, 4));
308   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms2_static, "");
309   VECT_VAR (vector_res, float, 32, 4) =
310     vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
311 		 VECT_VAR (vsrc_2, float, 32, 4), elem2);
312   vst1q_f32 (VECT_VAR (result, float, 32, 4),
313 	     VECT_VAR (vector_res, float, 32, 4));
314   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma2_static, "");
315 
316   VECT_VAR_DECL (buf_src_7, float, 32, 4) [] = {A1, A3, A5, A7};
317   VECT_VAR_DECL (buf_src_8, float, 32, 4) [] = {B1, B3, B5, B7};
318   VLOAD (vsrc_1, buf_src_7, q, float, f, 32, 4);
319   VLOAD (vsrc_2, buf_src_8, q, float, f, 32, 4);
320   VECT_VAR (vector_res, float, 32, 4) =
321     vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
322 		 VECT_VAR (vsrc_2, float, 32, 4), elem3);
323   vst1q_f32 (VECT_VAR (result, float, 32, 4),
324 	     VECT_VAR (vector_res, float, 32, 4));
325   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms3_static, "");
326   VECT_VAR (vector_res, float, 32, 4) =
327     vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4),
328 		 VECT_VAR (vsrc_2, float, 32, 4), elem3);
329   vst1q_f32 (VECT_VAR (result, float, 32, 4),
330 	     VECT_VAR (vector_res, float, 32, 4));
331   CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma3_static, "");
332 
333 #undef TEST_MSG
334 #define TEST_MSG "VFMSQ_VFMAQ_N (FP64)"
335   clean_results ();
336 
337   DECL_VARIABLE(vsrc_1, float, 64, 2);
338   DECL_VARIABLE(vsrc_2, float, 64, 2);
339   VECT_VAR_DECL (buf_src_1, float, 64, 2) [] = {DA0, DA1};
340   VECT_VAR_DECL (buf_src_2, float, 64, 2) [] = {DB0, DB1};
341   VLOAD (vsrc_1, buf_src_1, q, float, f, 64, 2);
342   VLOAD (vsrc_2, buf_src_2, q, float, f, 64, 2);
343   DECL_VARIABLE (vector_res, float, 64, 2) =
344     vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
345 		 VECT_VAR (vsrc_2, float, 64, 2), delem0);
346   vst1q_f64 (VECT_VAR (result, float, 64, 2),
347 	     VECT_VAR (vector_res, float, 64, 2));
348   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms0_static, "");
349   VECT_VAR (vector_res, float, 64, 2) =
350     vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
351 		 VECT_VAR (vsrc_2, float, 64, 2), delem0);
352   vst1q_f64 (VECT_VAR (result, float, 64, 2),
353 	     VECT_VAR (vector_res, float, 64, 2));
354   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma0_static, "");
355 
356   VECT_VAR_DECL (buf_src_3, float, 64, 2) [] = {DA2, DA3};
357   VECT_VAR_DECL (buf_src_4, float, 64, 2) [] = {DB2, DB3};
358   VLOAD (vsrc_1, buf_src_3, q, float, f, 64, 2);
359   VLOAD (vsrc_2, buf_src_4, q, float, f, 64, 2);
360   VECT_VAR (vector_res, float, 64, 2) =
361     vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
362 		 VECT_VAR (vsrc_2, float, 64, 2), delem1);
363   vst1q_f64 (VECT_VAR (result, float, 64, 2),
364 	     VECT_VAR (vector_res, float, 64, 2));
365   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms1_static, "");
366   VECT_VAR (vector_res, float, 64, 2) =
367     vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
368 		 VECT_VAR (vsrc_2, float, 64, 2), delem1);
369   vst1q_f64 (VECT_VAR (result, float, 64, 2),
370 	     VECT_VAR (vector_res, float, 64, 2));
371   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma1_static, "");
372 
373   VECT_VAR_DECL (buf_src_5, float, 64, 2) [] = {DA4, DA5};
374   VECT_VAR_DECL (buf_src_6, float, 64, 2) [] = {DB4, DB5};
375   VLOAD (vsrc_1, buf_src_5, q, float, f, 64, 2);
376   VLOAD (vsrc_2, buf_src_6, q, float, f, 64, 2);
377   VECT_VAR (vector_res, float, 64, 2) =
378     vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
379 		 VECT_VAR (vsrc_2, float, 64, 2), delem2);
380   vst1q_f64 (VECT_VAR (result, float, 64, 2),
381 	     VECT_VAR (vector_res, float, 64, 2));
382   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms2_static, "");
383   VECT_VAR (vector_res, float, 64, 2) =
384     vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
385 		 VECT_VAR (vsrc_2, float, 64, 2), delem2);
386   vst1q_f64 (VECT_VAR (result, float, 64, 2),
387 	     VECT_VAR (vector_res, float, 64, 2));
388   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma2_static, "");
389 
390   VECT_VAR_DECL (buf_src_7, float, 64, 2) [] = {DA6, DA7};
391   VECT_VAR_DECL (buf_src_8, float, 64, 2) [] = {DB6, DB7};
392   VLOAD (vsrc_1, buf_src_7, q, float, f, 64, 2);
393   VLOAD (vsrc_2, buf_src_8, q, float, f, 64, 2);
394   VECT_VAR (vector_res, float, 64, 2) =
395     vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
396 		 VECT_VAR (vsrc_2, float, 64, 2), delem3);
397   vst1q_f64 (VECT_VAR (result, float, 64, 2),
398 	     VECT_VAR (vector_res, float, 64, 2));
399   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms3_static, "");
400   VECT_VAR (vector_res, float, 64, 2) =
401     vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2),
402 		 VECT_VAR (vsrc_2, float, 64, 2), delem3);
403   vst1q_f64 (VECT_VAR (result, float, 64, 2),
404 	     VECT_VAR (vector_res, float, 64, 2));
405   CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma3_static, "");
406 
407 #undef TEST_MSG
408 #define TEST_MSG "VFMS_VFMA_N (FP64)"
409   clean_results ();
410 
411   DECL_VARIABLE(vsrc_1, float, 64, 1);
412   DECL_VARIABLE(vsrc_2, float, 64, 1);
413   VECT_VAR_DECL (buf_src_1, float, 64, 1) [] = {DA0};
414   VECT_VAR_DECL (buf_src_2, float, 64, 1) [] = {DB0};
415   VLOAD (vsrc_1, buf_src_1, , float, f, 64, 1);
416   VLOAD (vsrc_2, buf_src_2, , float, f, 64, 1);
417   DECL_VARIABLE (vector_res, float, 64, 1) =
418     vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
419 		VECT_VAR (vsrc_2, float, 64, 1), delem0);
420   vst1_f64 (VECT_VAR (result, float, 64, 1),
421 	     VECT_VAR (vector_res, float, 64, 1));
422   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms0_static, "");
423   VECT_VAR (vector_res, float, 64, 1) =
424     vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
425 		VECT_VAR (vsrc_2, float, 64, 1), delem0);
426   vst1_f64 (VECT_VAR (result, float, 64, 1),
427 	     VECT_VAR (vector_res, float, 64, 1));
428   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma0_static, "");
429 
430   VECT_VAR_DECL (buf_src_3, float, 64, 1) [] = {DA2};
431   VECT_VAR_DECL (buf_src_4, float, 64, 1) [] = {DB2};
432   VLOAD (vsrc_1, buf_src_3, , float, f, 64, 1);
433   VLOAD (vsrc_2, buf_src_4, , float, f, 64, 1);
434   VECT_VAR (vector_res, float, 64, 1) =
435     vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
436 		VECT_VAR (vsrc_2, float, 64, 1), delem1);
437   vst1_f64 (VECT_VAR (result, float, 64, 1),
438 	     VECT_VAR (vector_res, float, 64, 1));
439   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms1_static, "");
440   VECT_VAR (vector_res, float, 64, 1) =
441     vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
442 		VECT_VAR (vsrc_2, float, 64, 1), delem1);
443   vst1_f64 (VECT_VAR (result, float, 64, 1),
444 	     VECT_VAR (vector_res, float, 64, 1));
445   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma1_static, "");
446 
447   VECT_VAR_DECL (buf_src_5, float, 64, 1) [] = {DA4};
448   VECT_VAR_DECL (buf_src_6, float, 64, 1) [] = {DB4};
449   VLOAD (vsrc_1, buf_src_5, , float, f, 64, 1);
450   VLOAD (vsrc_2, buf_src_6, , float, f, 64, 1);
451   VECT_VAR (vector_res, float, 64, 1) =
452     vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
453 		VECT_VAR (vsrc_2, float, 64, 1), delem2);
454   vst1_f64 (VECT_VAR (result, float, 64, 1),
455 	     VECT_VAR (vector_res, float, 64, 1));
456   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms2_static, "");
457   VECT_VAR (vector_res, float, 64, 1) =
458     vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
459 		VECT_VAR (vsrc_2, float, 64, 1), delem2);
460   vst1_f64 (VECT_VAR (result, float, 64, 1),
461 	     VECT_VAR (vector_res, float, 64, 1));
462   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma2_static, "");
463 
464   VECT_VAR_DECL (buf_src_7, float, 64, 1) [] = {DA6};
465   VECT_VAR_DECL (buf_src_8, float, 64, 1) [] = {DB6};
466   VLOAD (vsrc_1, buf_src_7, , float, f, 64, 1);
467   VLOAD (vsrc_2, buf_src_8, , float, f, 64, 1);
468   VECT_VAR (vector_res, float, 64, 1) =
469     vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
470 		VECT_VAR (vsrc_2, float, 64, 1), delem3);
471   vst1_f64 (VECT_VAR (result, float, 64, 1),
472 	     VECT_VAR (vector_res, float, 64, 1));
473   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms3_static, "");
474   VECT_VAR (vector_res, float, 64, 1) =
475     vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1),
476 		VECT_VAR (vsrc_2, float, 64, 1), delem3);
477   vst1_f64 (VECT_VAR (result, float, 64, 1),
478 	     VECT_VAR (vector_res, float, 64, 1));
479   CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma3_static, "");
480 }
481 #endif
482 
483 int
main(void)484 main (void)
485 {
486 #if defined(__aarch64__) && defined(__ARM_FEATURE_FMA)
487   exec_vfma_vfms_n ();
488 #endif
489   return 0;
490 }
491