1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #ifdef __RESTRICT
31 #define restrict _Restrict
32 #else
33 #define restrict
34 #endif
35
36 void
__vatanf(int n,float * restrict x,int stridex,float * restrict y,int stridey)37 __vatanf( int n, float * restrict x, int stridex, float * restrict y, int stridey )
38 {
39 extern const double __vlibm_TBL_atan1[];
40 double conup0, conup1, conup2;
41 volatile float dummy __attribute__((unused));
42 float ansf = 0.0;
43 float f0, f1, f2;
44 float ans0, ans1, ans2;
45 float poly0, poly1, poly2;
46 float sign0, sign1, sign2;
47 int intf, intz, argcount;
48 int index0, index1, index2;
49 float z,*yaddr0,*yaddr1,*yaddr2;
50 int *pz = (int *) &z;
51 #ifdef UNROLL4
52 double conup3;
53 int index3;
54 float f3, ans3, poly3, sign3, *yaddr3;
55 #endif
56
57 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
58 * Error = -3.08254E-18 On the interval |x| < 1/64 */
59
60 static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
61 static const float pone = 1.0f;
62
63 if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */
64 do
65 {
66 LOOP0:
67
68 intf = *(int *) x; /* upper half of x, as integer */
69 f0 = *x;
70 sign0 = pone;
71 if (intf < 0) {
72 intf = intf & ~0x80000000; /* abs(upper argument) */
73 f0 = -f0;
74 sign0 = -sign0;
75 }
76
77 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
78 {
79 if( intf > 0x7f800000 )
80 {
81 ansf = f0- f0; /* return NaN if x=NaN*/
82 }
83 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
84 {
85 dummy = 1.0e37 + f0;
86 ansf = f0;
87 }
88 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
89 {
90 index0= 2;
91 ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */
92 }
93 *y = sign0*ansf; /* store answer, with sign bit */
94 x += stridex;
95 y += stridey;
96 argcount = 0; /* initialize argcount */
97 if ( --n <=0 ) break; /* we are done */
98 goto LOOP0; /* otherwise, examine next arg */
99 }
100
101 if (intf > 0x42800000) /* if(|x| > 64 */
102 {
103 f0 = -pone/f0;
104 index0 = 2; /* point to pi/2 upper, lower */
105 }
106 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
107 {
108 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
109 pz[0] = intz; /* store as a float (z) */
110 f0 = (f0 - z)/(pone + f0*z);
111 index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
112 index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */
113 }
114 else /* |x| < 1/64 */
115 {
116 index0 = 0; /* points to 0,0 in table */
117 }
118 yaddr0 = y; /* address to store this answer */
119 x += stridex; /* point to next arg */
120 y += stridey; /* point to next result */
121 argcount = 1; /* we now have 1 good argument */
122 if ( --n <=0 )
123 {
124 goto UNROLL; /* finish up with 1 good arg */
125 }
126
127 /*--------------------------------------------------------------------------*/
128 /*--------------------------------------------------------------------------*/
129 /*--------------------------------------------------------------------------*/
130
131 LOOP1:
132
133 intf = *(int *) x; /* upper half of x, as integer */
134 f1 = *x;
135 sign1 = pone;
136 if (intf < 0) {
137 intf = intf & ~0x80000000; /* abs(upper argument) */
138 f1 = -f1;
139 sign1 = -sign1;
140 }
141
142 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
143 {
144 if( intf > 0x7f800000 )
145 {
146 ansf = f1 - f1; /* return NaN if x=NaN*/
147 }
148 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
149 {
150 dummy = 1.0e37 + f1;
151 ansf = f1;
152 }
153 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
154 {
155 index1 = 2;
156 ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
157 }
158 *y = sign1 * ansf; /* store answer, with sign bit */
159 x += stridex;
160 y += stridey;
161 argcount = 1; /* we still have 1 good arg */
162 if ( --n <=0 )
163 {
164 goto UNROLL; /* finish up with 1 good arg */
165 }
166 goto LOOP1; /* otherwise, examine next arg */
167 }
168
169 if (intf > 0x42800000) /* if(|x| > 64 */
170 {
171 f1 = -pone/f1;
172 index1 = 2; /* point to pi/2 upper, lower */
173 }
174 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
175 {
176 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
177 pz[0] = intz; /* store as a float (z) */
178 f1 = (f1 - z)/(pone + f1*z);
179 index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
180 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */
181 }
182 else
183 {
184 index1 = 0; /* points to 0,0 in table */
185 }
186
187 yaddr1 = y; /* address to store this answer */
188 x += stridex; /* point to next arg */
189 y += stridey; /* point to next result */
190 argcount = 2; /* we now have 2 good arguments */
191 if ( --n <=0 )
192 {
193 goto UNROLL; /* finish up with 2 good args */
194 }
195
196 /*--------------------------------------------------------------------------*/
197 /*--------------------------------------------------------------------------*/
198 /*--------------------------------------------------------------------------*/
199
200 LOOP2:
201
202 intf = *(int *) x; /* upper half of x, as integer */
203 f2 = *x;
204 sign2 = pone;
205 if (intf < 0) {
206 intf = intf & ~0x80000000; /* abs(upper argument) */
207 f2 = -f2;
208 sign2 = -sign2;
209 }
210
211 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
212 {
213 if( intf > 0x7f800000 )
214 {
215 ansf = f2 - f2; /* return NaN if x=NaN*/
216 }
217 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
218 {
219 dummy = 1.0e37 + f2;
220 ansf = f2;
221 }
222 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
223 {
224 index2 = 2;
225 ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
226 }
227 *y = sign2 * ansf; /* store answer, with sign bit */
228 x += stridex;
229 y += stridey;
230 argcount = 2; /* we still have 2 good args */
231 if ( --n <=0 )
232 {
233 goto UNROLL; /* finish up with 2 good args */
234 }
235 goto LOOP2; /* otherwise, examine next arg */
236 }
237
238 if (intf > 0x42800000) /* if(|x| > 64 */
239 {
240 f2 = -pone/f2;
241 index2 = 2; /* point to pi/2 upper, lower */
242 }
243 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
244 {
245 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
246 pz[0] = intz; /* store as a float (z) */
247 f2 = (f2 - z)/(pone + f2*z);
248 index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
249 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */
250 }
251 else
252 {
253 index2 = 0; /* points to 0,0 in table */
254 }
255 yaddr2 = y; /* address to store this answer */
256 x += stridex; /* point to next arg */
257 y += stridey; /* point to next result */
258 argcount = 3; /* we now have 3 good arguments */
259 if ( --n <=0 )
260 {
261 goto UNROLL; /* finish up with 2 good args */
262 }
263
264
265 /*--------------------------------------------------------------------------*/
266 /*--------------------------------------------------------------------------*/
267 /*--------------------------------------------------------------------------*/
268
269 #ifdef UNROLL4
270 LOOP3:
271
272 intf = *(int *) x; /* upper half of x, as integer */
273 f3 = *x;
274 sign3 = pone;
275 if (intf < 0) {
276 intf = intf & ~0x80000000; /* abs(upper argument) */
277 f3 = -f3;
278 sign3 = -sign3;
279 }
280
281 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
282 {
283 if( intf > 0x7f800000 )
284 {
285 ansf = f3 - f3; /* return NaN if x=NaN*/
286 }
287 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
288 {
289 dummy = 1.0e37 + f3;
290 ansf = f3;
291 }
292 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
293 {
294 index3 = 2;
295 ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
296 }
297 *y = sign3 * ansf; /* store answer, with sign bit */
298 x += stridex;
299 y += stridey;
300 argcount = 3; /* we still have 3 good args */
301 if ( --n <=0 )
302 {
303 goto UNROLL; /* finish up with 3 good args */
304 }
305 goto LOOP3; /* otherwise, examine next arg */
306 }
307
308 if (intf > 0x42800000) /* if(|x| > 64 */
309 {
310 n3 = -pone;
311 d3 = f3;
312 f3 = n3/d3;
313 index3 = 2; /* point to pi/2 upper, lower */
314 }
315 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
316 {
317 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
318 pz[0] = intz; /* store as a float (z) */
319 n3 = (f3 - z);
320 d3 = (pone + f3*z); /* get reduced argument */
321 f3 = n3/d3;
322 index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
323 index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */
324 }
325 else
326 {
327 n3 = f3;
328 d3 = pone;
329 index3 = 0; /* points to 0,0 in table */
330 }
331 yaddr3 = y; /* address to store this answer */
332 x += stridex; /* point to next arg */
333 y += stridey; /* point to next result */
334 argcount = 4; /* we now have 4 good arguments */
335 if ( --n <=0 )
336 {
337 goto UNROLL; /* finish up with 3 good args */
338 }
339 #endif /* UNROLL4 */
340
341 /* here is the n-way unrolled section,
342 but we may actually have less than n
343 arguments at this point
344 */
345
346 UNROLL:
347
348 #ifdef UNROLL4
349 if (argcount == 4)
350 {
351 conup0 = __vlibm_TBL_atan1[index0];
352 conup1 = __vlibm_TBL_atan1[index1];
353 conup2 = __vlibm_TBL_atan1[index2];
354 conup3 = __vlibm_TBL_atan1[index3];
355 poly0 = p1*f0*f0*f0 + f0;
356 ans0 = sign0 * (float)(conup0 + poly0);
357 poly1 = p1*f1*f1*f1 + f1;
358 ans1 = sign1 * (float)(conup1 + poly1);
359 poly2 = p1*f2*f2*f2 + f2;
360 ans2 = sign2 * (float)(conup2 + poly2);
361 poly3 = p1*f3*f3*f3 + f3;
362 ans3 = sign3 * (float)(conup3 + poly3);
363 *yaddr0 = ans0;
364 *yaddr1 = ans1;
365 *yaddr2 = ans2;
366 *yaddr3 = ans3;
367 }
368 else
369 #endif
370 if (argcount == 3)
371 {
372 conup0 = __vlibm_TBL_atan1[index0];
373 conup1 = __vlibm_TBL_atan1[index1];
374 conup2 = __vlibm_TBL_atan1[index2];
375 poly0 = p1*f0*f0*f0 + f0;
376 poly1 = p1*f1*f1*f1 + f1;
377 poly2 = p1*f2*f2*f2 + f2;
378 ans0 = sign0 * (float)(conup0 + poly0);
379 ans1 = sign1 * (float)(conup1 + poly1);
380 ans2 = sign2 * (float)(conup2 + poly2);
381 *yaddr0 = ans0;
382 *yaddr1 = ans1;
383 *yaddr2 = ans2;
384 }
385 else
386 if (argcount == 2)
387 {
388 conup0 = __vlibm_TBL_atan1[index0];
389 conup1 = __vlibm_TBL_atan1[index1];
390 poly0 = p1*f0*f0*f0 + f0;
391 poly1 = p1*f1*f1*f1 + f1;
392 ans0 = sign0 * (float)(conup0 + poly0);
393 ans1 = sign1 * (float)(conup1 + poly1);
394 *yaddr0 = ans0;
395 *yaddr1 = ans1;
396 }
397 else
398 if (argcount == 1)
399 {
400 conup0 = __vlibm_TBL_atan1[index0];
401 poly0 = p1*f0*f0*f0 + f0;
402 ans0 = sign0 * (float)(conup0 + poly0);
403 *yaddr0 = ans0;
404 }
405
406 } while (n > 0);
407
408 }
409