1 /* { dg-do run } */
2 /* { dg-options "-O3 --save-temps" } */
3 
4 #include <arm_neon.h>
5 #include <math.h>
6 #include <stdlib.h>
7 
8 int
test_frecps_float32_t(void)9 test_frecps_float32_t (void)
10 {
11   int i;
12   float32_t value = 0.2;
13   float32_t reciprocal = 5.0;
14   float32_t step = vrecpes_f32 (value);
15   /* 3 steps should give us within ~0.001 accuracy.  */
16   for (i = 0; i < 3; i++)
17     step = step * vrecpss_f32 (step, value);
18 
19   return fabs (step - reciprocal) < 0.001;
20 }
21 
22 /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
23 /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
24 
25 int
test_frecps_float32x2_t(void)26 test_frecps_float32x2_t (void)
27 {
28   int i;
29   int ret = 1;
30 
31   const float32_t value_pool[] = {0.2, 0.4};
32   const float32_t reciprocal_pool[] = {5.0, 2.5};
33   float32x2_t value = vld1_f32 (value_pool);
34   float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
35 
36   float32x2_t step = vrecpe_f32 (value);
37   /* 3 steps should give us within ~0.001 accuracy.  */
38   for (i = 0; i < 3; i++)
39     step = step * vrecps_f32 (step, value);
40 
41   ret &= fabs (vget_lane_f32 (step, 0)
42 	       - vget_lane_f32 (reciprocal, 0)) < 0.001;
43   ret &= fabs (vget_lane_f32 (step, 1)
44 	       - vget_lane_f32 (reciprocal, 1)) < 0.001;
45 
46   return ret;
47 }
48 
49 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
50 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
51 
52 int
test_frecps_float32x4_t(void)53 test_frecps_float32x4_t (void)
54 {
55   int i;
56   int ret = 1;
57 
58   const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
59   const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
60   float32x4_t value = vld1q_f32 (value_pool);
61   float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
62 
63   float32x4_t step = vrecpeq_f32 (value);
64   /* 3 steps should give us within ~0.001 accuracy.  */
65   for (i = 0; i < 3; i++)
66     step = step * vrecpsq_f32 (step, value);
67 
68   ret &= fabs (vgetq_lane_f32 (step, 0)
69 	       - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
70   ret &= fabs (vgetq_lane_f32 (step, 1)
71 	       - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
72   ret &= fabs (vgetq_lane_f32 (step, 2)
73 	       - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
74   ret &= fabs (vgetq_lane_f32 (step, 3)
75 	       - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
76 
77   return ret;
78 }
79 
80 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
81 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
82 
83 int
test_frecps_float64_t(void)84 test_frecps_float64_t (void)
85 {
86   int i;
87   float64_t value = 0.2;
88   float64_t reciprocal = 5.0;
89   float64_t step = vrecped_f64 (value);
90   /* 3 steps should give us within ~0.001 accuracy.  */
91   for (i = 0; i < 3; i++)
92     step = step * vrecpsd_f64 (step, value);
93 
94   return fabs (step - reciprocal) < 0.001;
95 }
96 
97 /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
98 /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
99 
100 int
test_frecps_float64x2_t(void)101 test_frecps_float64x2_t (void)
102 {
103   int i;
104   int ret = 1;
105 
106   const float64_t value_pool[] = {0.2, 0.4};
107   const float64_t reciprocal_pool[] = {5.0, 2.5};
108   float64x2_t value = vld1q_f64 (value_pool);
109   float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
110 
111   float64x2_t step = vrecpeq_f64 (value);
112   /* 3 steps should give us within ~0.001 accuracy.  */
113   for (i = 0; i < 3; i++)
114     step = step * vrecpsq_f64 (step, value);
115 
116   ret &= fabs (vgetq_lane_f64 (step, 0)
117 	       - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
118   ret &= fabs (vgetq_lane_f64 (step, 1)
119 	       - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
120 
121   return ret;
122 }
123 
124 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
125 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
126 
127 int
main(int argc,char ** argv)128 main (int argc, char **argv)
129 {
130   if (!test_frecps_float32_t ())
131     abort ();
132   if (!test_frecps_float32x2_t ())
133     abort ();
134   if (!test_frecps_float32x4_t ())
135     abort ();
136   if (!test_frecps_float64_t ())
137     abort ();
138   if (!test_frecps_float64x2_t ())
139     abort ();
140 
141   return 0;
142 }
143 
144