1 /* { dg-do run } */
2 /* { dg-options "-O3 --save-temps" } */
3 
4 #include <arm_neon.h>
5 
6 #define DELTA 0.0001
7 
8 extern double fabs (double);
9 
10 extern void abort (void);
11 
12 #define TEST_VMLS(q1, q2, size, in1_lanes, in2_lanes)			\
13 static void								\
14 test_vfms##q1##_lane##q2##_f##size (float##size##_t * res,		\
15 				   const float##size##_t *in1,		\
16 				   const float##size##_t *in2)		\
17 {									\
18   float##size##x##in1_lanes##_t a = vld1##q1##_f##size (res);		\
19   float##size##x##in1_lanes##_t b = vld1##q1##_f##size (in1);		\
20   float##size##x##in2_lanes##_t c;					\
21   if (in2_lanes > 1)							\
22     {									\
23       c = vld1##q2##_f##size (in2);					\
24       a = vfms##q1##_lane##q2##_f##size (a, b, c, 1);			\
25     }									\
26   else									\
27     {									\
28       c = vld1##q2##_f##size (in2 + 1);					\
29       a = vfms##q1##_lane##q2##_f##size (a, b, c, 0);			\
30     }									\
31   vst1##q1##_f##size (res, a);						\
32 }
33 
34 #define BUILD_VARS(width, n_lanes, n_half_lanes)		\
35 TEST_VMLS ( ,  , width, n_half_lanes, n_half_lanes)		\
36 TEST_VMLS (q,  , width, n_lanes, n_half_lanes)			\
37 TEST_VMLS ( , q, width, n_half_lanes, n_lanes)			\
38 TEST_VMLS (q, q, width, n_lanes, n_lanes)			\
39 
40 BUILD_VARS (32, 4, 2)
41 BUILD_VARS (64, 2, 1)
42 
43 #define POOL2 {0.0, 1.0}
44 #define POOL4 {0.0, 1.0, 2.0, 3.0}
45 #define EMPTY2 {0.0, 0.0}
46 #define EMPTY4 {0.0, 0.0, 0.0, 0.0}
47 
48 #define BUILD_TEST(size, lanes)					\
49 static void							\
50 test_f##size (void)						\
51 {								\
52   int i;							\
53   float##size##_t pool[lanes] = POOL##lanes;			\
54   float##size##_t res[lanes] = EMPTY##lanes;			\
55   float##size##_t res2[lanes] = EMPTY##lanes;			\
56   float##size##_t res3[lanes] = EMPTY##lanes;			\
57   float##size##_t res4[lanes] = EMPTY##lanes;			\
58 								\
59   /* Forecfully avoid optimization.  */				\
60   asm volatile ("" : : : "memory");				\
61   test_vfms_lane_f##size (res, pool, pool);			\
62   asm volatile ("" : :"Q" (res) : "memory");			\
63   for (i = 0; i < lanes / 2; i++)				\
64     if (fabs (res[i] + pool[i]) > DELTA)			\
65       abort ();							\
66 								\
67   /* Forecfully avoid optimization.  */				\
68   test_vfmsq_lane_f##size (res2, pool, pool);			\
69   asm volatile ("" : :"Q" (res2) : "memory");			\
70   for (i = 0; i < lanes; i++)					\
71     if (fabs (res2[i] + pool[i]) > DELTA)			\
72       abort ();							\
73 								\
74   /* Forecfully avoid optimization.  */				\
75   test_vfms_laneq_f##size (res3, pool, pool);			\
76   asm volatile ("" : :"Q" (res3) : "memory");			\
77   for (i = 0; i < lanes / 2; i++)				\
78     if (fabs (res3[i] + pool[i]) > DELTA)			\
79       abort ();							\
80 								\
81   /* Forecfully avoid optimization.  */				\
82   test_vfmsq_laneq_f##size (res4, pool, pool);			\
83   asm volatile ("" : :"Q" (res4) : "memory");			\
84   for (i = 0; i < lanes; i++)					\
85     if (fabs (res4[i] + pool[i]) > DELTA)			\
86       abort ();							\
87 }
88 
89 BUILD_TEST (32, 4)
90 BUILD_TEST (64, 2)
91 
92 int
main(int argc,char ** argv)93 main (int argc, char **argv)
94 {
95   test_f32 ();
96   test_f64 ();
97   return 0;
98 }
99 
100 /* vfms_laneq_f32.
101    vfms_lane_f32.  */
102 /* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s\\\[\[0-9\]+\\\]" 2 } } */
103 
104 /* vfmsq_lane_f32.
105    vfmsq_laneq_f32.  */
106 /* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 2 } } */
107 
108 /* vfms_lane_f64.  */
109 /* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 } } */
110 
111 /* vfmsq_lane_f64.
112    vfms_laneq_f64.
113    vfmsq_laneq_f64.  */
114 /* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */
115 
116 
117