1 /* { dg-do run } */
2 /* { dg-options "-O3 --save-temps" } */
3
4 #include <arm_neon.h>
5 #include <math.h>
6 #include <stdlib.h>
7
8 int
test_frecps_float32_t(void)9 test_frecps_float32_t (void)
10 {
11 int i;
12 float32_t value = 0.2;
13 float32_t reciprocal = 5.0;
14 float32_t step = vrecpes_f32 (value);
15 /* 3 steps should give us within ~0.001 accuracy. */
16 for (i = 0; i < 3; i++)
17 step = step * vrecpss_f32 (step, value);
18
19 return fabs (step - reciprocal) < 0.001;
20 }
21
22 /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
23 /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
24
25 int
test_frecps_float32x2_t(void)26 test_frecps_float32x2_t (void)
27 {
28 int i;
29 int ret = 1;
30
31 const float32_t value_pool[] = {0.2, 0.4};
32 const float32_t reciprocal_pool[] = {5.0, 2.5};
33 float32x2_t value = vld1_f32 (value_pool);
34 float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
35
36 float32x2_t step = vrecpe_f32 (value);
37 /* 3 steps should give us within ~0.001 accuracy. */
38 for (i = 0; i < 3; i++)
39 step = step * vrecps_f32 (step, value);
40
41 ret &= fabs (vget_lane_f32 (step, 0)
42 - vget_lane_f32 (reciprocal, 0)) < 0.001;
43 ret &= fabs (vget_lane_f32 (step, 1)
44 - vget_lane_f32 (reciprocal, 1)) < 0.001;
45
46 return ret;
47 }
48
49 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
50 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
51
52 int
test_frecps_float32x4_t(void)53 test_frecps_float32x4_t (void)
54 {
55 int i;
56 int ret = 1;
57
58 const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
59 const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
60 float32x4_t value = vld1q_f32 (value_pool);
61 float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
62
63 float32x4_t step = vrecpeq_f32 (value);
64 /* 3 steps should give us within ~0.001 accuracy. */
65 for (i = 0; i < 3; i++)
66 step = step * vrecpsq_f32 (step, value);
67
68 ret &= fabs (vgetq_lane_f32 (step, 0)
69 - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
70 ret &= fabs (vgetq_lane_f32 (step, 1)
71 - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
72 ret &= fabs (vgetq_lane_f32 (step, 2)
73 - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
74 ret &= fabs (vgetq_lane_f32 (step, 3)
75 - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
76
77 return ret;
78 }
79
80 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
81 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
82
83 int
test_frecps_float64_t(void)84 test_frecps_float64_t (void)
85 {
86 int i;
87 float64_t value = 0.2;
88 float64_t reciprocal = 5.0;
89 float64_t step = vrecped_f64 (value);
90 /* 3 steps should give us within ~0.001 accuracy. */
91 for (i = 0; i < 3; i++)
92 step = step * vrecpsd_f64 (step, value);
93
94 return fabs (step - reciprocal) < 0.001;
95 }
96
97 /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
98 /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
99
100 int
test_frecps_float64x2_t(void)101 test_frecps_float64x2_t (void)
102 {
103 int i;
104 int ret = 1;
105
106 const float64_t value_pool[] = {0.2, 0.4};
107 const float64_t reciprocal_pool[] = {5.0, 2.5};
108 float64x2_t value = vld1q_f64 (value_pool);
109 float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
110
111 float64x2_t step = vrecpeq_f64 (value);
112 /* 3 steps should give us within ~0.001 accuracy. */
113 for (i = 0; i < 3; i++)
114 step = step * vrecpsq_f64 (step, value);
115
116 ret &= fabs (vgetq_lane_f64 (step, 0)
117 - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
118 ret &= fabs (vgetq_lane_f64 (step, 1)
119 - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
120
121 return ret;
122 }
123
124 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
125 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
126
127 int
main(int argc,char ** argv)128 main (int argc, char **argv)
129 {
130 if (!test_frecps_float32_t ())
131 abort ();
132 if (!test_frecps_float32x2_t ())
133 abort ();
134 if (!test_frecps_float32x4_t ())
135 abort ();
136 if (!test_frecps_float64_t ())
137 abort ();
138 if (!test_frecps_float64x2_t ())
139 abort ();
140
141 return 0;
142 }
143
144