1 /* { dg-do run } */
2 /* { dg-options "-O3 --save-temps" } */
3 
4 #include <arm_neon.h>
5 
6 #define DELTA 0.0001
7 extern void abort (void);
8 extern double fabs (double);
9 
10 #define TEST_VMUL(q1, q2, size, in1_lanes, in2_lanes)			\
11 static void								\
12 test_vmul##q1##_lane##q2##_f##size (float##size##_t * res,		\
13 				   const float##size##_t *in1,		\
14 				   const float##size##_t *in2)		\
15 {									\
16   float##size##x##in1_lanes##_t a = vld1##q1##_f##size (res);		\
17   float##size##x##in1_lanes##_t b = vld1##q1##_f##size (in1);		\
18   float##size##x##in2_lanes##_t c;					\
19   if (in2_lanes > 1)							\
20     {									\
21       c = vld1##q2##_f##size (in2);					\
22       a = vmul##q1##_lane##q2##_f##size (b, c, 1);			\
23     }									\
24   else									\
25     {									\
26       c = vld1##q2##_f##size (in2 + 1);					\
27       a = vmul##q1##_lane##q2##_f##size (b, c, 0);			\
28     }									\
29   vst1##q1##_f##size (res, a);						\
30 }
31 
32 #define BUILD_VARS(width, n_lanes, n_half_lanes)		\
33 TEST_VMUL ( ,  , width, n_half_lanes, n_half_lanes)		\
34 TEST_VMUL (q,  , width, n_lanes, n_half_lanes)			\
35 TEST_VMUL ( , q, width, n_half_lanes, n_lanes)			\
36 TEST_VMUL (q, q, width, n_lanes, n_lanes)
37 
38 BUILD_VARS (32, 4, 2)
39 BUILD_VARS (64, 2, 1)
40 
41 #define POOL2 {0.0, 1.0}
42 #define POOL4 {0.0, 1.0, 2.0, 3.0}
43 #define EMPTY2 {0.0, 0.0}
44 #define EMPTY4 {0.0, 0.0, 0.0, 0.0}
45 
46 #define BUILD_TEST(size, lanes)					\
47 static void							\
48 test_f##size (void)						\
49 {								\
50   int i;							\
51   float##size##_t pool[lanes] = POOL##lanes;			\
52   float##size##_t res[lanes] = EMPTY##lanes;			\
53   float##size##_t res2[lanes] = EMPTY##lanes;			\
54   float##size##_t res3[lanes] = EMPTY##lanes;			\
55   float##size##_t res4[lanes] = EMPTY##lanes;			\
56 								\
57   /* Avoid constant folding the multiplication.  */		\
58   asm volatile ("" : : : "memory");				\
59   test_vmul_lane_f##size (res, pool, pool);			\
60   /* Avoid fusing multiplication and subtraction.  */		\
61   asm volatile ("" : :"Q" (res) : "memory");			\
62   for (i = 0; i < lanes / 2; i++)				\
63     if (fabs (res[i] - pool[i]) > DELTA)			\
64       abort ();							\
65 								\
66   test_vmulq_lane_f##size (res2, pool, pool);			\
67   /* Avoid fusing multiplication and subtraction.  */		\
68   asm volatile ("" : :"Q" (res2) : "memory");			\
69   for (i = 0; i < lanes; i++)					\
70     if (fabs (res2[i] - pool[i]) > DELTA)			\
71       abort ();							\
72 								\
73   test_vmul_laneq_f##size (res3, pool, pool);			\
74   /* Avoid fusing multiplication and subtraction.  */		\
75   asm volatile ("" : :"Q" (res3) : "memory");			\
76   for (i = 0; i < lanes / 2; i++)				\
77     if (fabs (res3[i] - pool[i]) > DELTA)			\
78       abort ();							\
79 								\
80   test_vmulq_laneq_f##size (res4, pool, pool);			\
81   /* Avoid fusing multiplication and subtraction.  */		\
82   asm volatile ("" : :"Q" (res4) : "memory");			\
83   for (i = 0; i < lanes; i++)					\
84     if (fabs (res4[i] - pool[i]) > DELTA)			\
85       abort ();							\
86 }
87 
88 BUILD_TEST (32, 4)
89 BUILD_TEST (64, 2)
90 
91 int
main(int argc,char ** argv)92 main (int argc, char **argv)
93 {
94   test_f32 ();
95   test_f64 ();
96   return 0;
97 }
98 
99 /* vmul_laneq_f32.
100    vmul_lane_f32.  */
101 /* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 2 } } */
102 
103 /* vmulq_lane_f32.
104    vmulq_laneq_f32.  */
105 /* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 2 } } */
106 
107 /* vmul_lane_f64.  */
108 /* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
109 
110 /* vmul_laneq_f64.
111    vmulq_lane_f64.
112    vmulq_laneq_f64.  */
113 /* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
114 
115 
116