1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #ifdef __RESTRICT
31 #define restrict _Restrict
32 #else
33 #define restrict
34 #endif
35 
36 void
__vatanf(int n,float * restrict x,int stridex,float * restrict y,int stridey)37 __vatanf( int n, float * restrict x, int stridex, float * restrict y, int stridey )
38 {
39   extern const double __vlibm_TBL_atan1[];
40   double  conup0, conup1, conup2;
41   volatile float dummy __attribute__((unused));
42   float ansf = 0.0;
43   float f0, f1, f2;
44   float ans0, ans1, ans2;
45   float poly0, poly1, poly2;
46   float sign0, sign1, sign2;
47   int intf, intz, argcount;
48   int index0, index1, index2;
49   float z,*yaddr0,*yaddr1,*yaddr2;
50   int *pz = (int *) &z;
51 #ifdef UNROLL4
52   double conup3;
53   int index3;
54   float f3, ans3, poly3, sign3, *yaddr3;
55 #endif
56 
57 /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
58  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
59 
60   static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
61   static const float pone = 1.0f;
62 
63   if( n <= 0 ) return;		/* if no. of elements is 0 or neg, do nothing */
64   do
65   {
66   LOOP0:
67 
68 	intf     = *(int *) x;		/* upper half of x, as integer */
69 	f0 = *x;
70 	sign0 = pone;
71     	if (intf < 0) {
72     		intf = intf & ~0x80000000; /* abs(upper argument) */
73 		f0 = -f0;
74 		sign0 = -sign0;
75 	}
76 
77     if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
78     {
79       if( intf > 0x7f800000 )
80       {
81 	ansf  = f0- f0; 				/* return NaN if x=NaN*/
82       }
83       else if( intf < 0x31800000 ) 		/* avoid underflow for small arg */
84       {
85         dummy = 1.0e37 + f0;
86 	ansf  = f0;
87       }
88       else if( intf > 0x5B000000 )		/* avoid underflow for big arg  */
89       {
90         index0= 2;
91         ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
92       }
93       *y      = sign0*ansf;		/* store answer, with sign bit 	*/
94       x      += stridex;
95       y      += stridey;
96       argcount = 0;				/* initialize argcount		*/
97       if ( --n <=0 ) break;			/* we are done 			*/
98       goto LOOP0;				/* otherwise, examine next arg  */
99     }
100 
101     if (intf > 0x42800000)			/* if(|x| > 64               	*/
102     {
103     f0 = -pone/f0;
104 	index0 = 2; 				/* point to pi/2 upper, lower	*/
105     }
106     else if( intf >= 0x3C800000 )		/* if |x| >= (1/64)... 		*/
107     {
108       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
109       pz[0]  = intz;				/* store as a float (z)		*/
110     f0 = (f0 - z)/(pone + f0*z);
111 	index0 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
112 	index0 = index0+ 4;			/* skip over 0,0,pi/2,pi/2	*/
113     }
114     else					/* |x| < 1/64 */
115     {
116 	index0   = 0;				/* points to 0,0 in table	*/
117     }
118     yaddr0   = y;				/* address to store this answer */
119     x       += stridex;				/* point to next arg		*/
120     y       += stridey;				/* point to next result		*/
121     argcount = 1;				/* we now have 1 good argument  */
122     if ( --n <=0 )
123     {
124       goto UNROLL;				/* finish up with 1 good arg 	*/
125     }
126 
127     /*--------------------------------------------------------------------------*/
128     /*--------------------------------------------------------------------------*/
129     /*--------------------------------------------------------------------------*/
130 
131   LOOP1:
132 
133 	intf     = *(int *) x;		/* upper half of x, as integer */
134 	f1 = *x;
135 	sign1 = pone;
136     	if (intf < 0) {
137     		intf = intf & ~0x80000000; /* abs(upper argument) */
138 		f1 = -f1;
139 		sign1 = -sign1;
140 	}
141 
142     if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
143     {
144       if( intf > 0x7f800000 )
145       {
146 	ansf   = f1 - f1;			/* return NaN if x=NaN*/
147       }
148       else if( intf < 0x31800000 ) 		/* avoid underflow for small arg */
149       {
150         dummy = 1.0e37 + f1;
151 	ansf   = f1;
152       }
153       else if( intf > 0x5B000000 )		/* avoid underflow for big arg  */
154       {
155         index1 = 2;
156         ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
157       }
158       *y      = sign1 * ansf;		/* store answer, with sign bit 	*/
159       x      += stridex;
160       y      += stridey;
161       argcount = 1;				/* we still have 1 good arg 	*/
162       if ( --n <=0 )
163       {
164         goto UNROLL;				/* finish up with 1 good arg 	*/
165       }
166       goto LOOP1;				/* otherwise, examine next arg  */
167     }
168 
169     if (intf > 0x42800000)			/* if(|x| > 64               	*/
170     {
171     f1 = -pone/f1;
172       index1 = 2; 				/* point to pi/2 upper, lower	*/
173     }
174     else if( intf >= 0x3C800000 )		/* if |x| >= (1/64)... 		*/
175     {
176       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
177       pz[0]  = intz;				/* store as a float (z)		*/
178     f1 = (f1 - z)/(pone + f1*z);
179       index1 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
180       index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
181     }
182     else
183     {
184 	index1   = 0;				/* points to 0,0 in table	*/
185     }
186 
187     yaddr1   = y;				/* address to store this answer */
188     x       += stridex;				/* point to next arg		*/
189     y       += stridey;				/* point to next result		*/
190     argcount = 2;				/* we now have 2 good arguments */
191     if ( --n <=0 )
192     {
193       goto UNROLL;				/* finish up with 2 good args 	*/
194     }
195 
196     /*--------------------------------------------------------------------------*/
197     /*--------------------------------------------------------------------------*/
198     /*--------------------------------------------------------------------------*/
199 
200   LOOP2:
201 
202 	intf     = *(int *) x;		/* upper half of x, as integer */
203 	f2 = *x;
204 	sign2 = pone;
205     	if (intf < 0) {
206     		intf = intf & ~0x80000000; /* abs(upper argument) */
207 		f2 = -f2;
208 		sign2 = -sign2;
209 	}
210 
211     if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
212     {
213       if( intf > 0x7f800000 )
214       {
215 	ansf   = f2 - f2;			/* return NaN if x=NaN*/
216       }
217       else if( intf < 0x31800000 ) 		/* avoid underflow for small arg */
218       {
219         dummy = 1.0e37 + f2;
220 	ansf   = f2;
221       }
222       else if( intf > 0x5B000000 )		/* avoid underflow for big arg  */
223       {
224         index2 = 2;
225         ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
226       }
227       *y      = sign2 * ansf;		/* store answer, with sign bit 	*/
228       x      += stridex;
229       y      += stridey;
230       argcount = 2;				/* we still have 2 good args 	*/
231       if ( --n <=0 )
232       {
233         goto UNROLL;				/* finish up with 2 good args 	*/
234       }
235       goto LOOP2;				/* otherwise, examine next arg  */
236     }
237 
238     if (intf > 0x42800000)			/* if(|x| > 64               	*/
239     {
240     f2 = -pone/f2;
241       index2 = 2; 				/* point to pi/2 upper, lower	*/
242     }
243     else if( intf >= 0x3C800000 )		/* if |x| >= (1/64)... 		*/
244     {
245       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
246       pz[0]  = intz;				/* store as a float (z)		*/
247     f2 = (f2 - z)/(pone + f2*z);
248       index2 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
249       index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
250     }
251     else
252     {
253 	index2   = 0;				/* points to 0,0 in table	*/
254     }
255     yaddr2   = y;				/* address to store this answer */
256     x       += stridex;				/* point to next arg		*/
257     y       += stridey;				/* point to next result		*/
258     argcount = 3;				/* we now have 3 good arguments */
259     if ( --n <=0 )
260     {
261       goto UNROLL;				/* finish up with 2 good args 	*/
262     }
263 
264 
265     /*--------------------------------------------------------------------------*/
266     /*--------------------------------------------------------------------------*/
267     /*--------------------------------------------------------------------------*/
268 
269 #ifdef UNROLL4
270   LOOP3:
271 
272 	intf     = *(int *) x;		/* upper half of x, as integer */
273 	f3 = *x;
274 	sign3 = pone;
275     	if (intf < 0) {
276     		intf = intf & ~0x80000000; /* abs(upper argument) */
277 		f3 = -f3;
278 		sign3 = -sign3;
279 	}
280 
281     if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
282     {
283       if( intf > 0x7f800000 )
284       {
285 	ansf   = f3 - f3;			/* return NaN if x=NaN*/
286       }
287       else if( intf < 0x31800000 ) 		/* avoid underflow for small arg */
288       {
289         dummy = 1.0e37 + f3;
290 	ansf   = f3;
291       }
292       else if( intf > 0x5B000000 )		/* avoid underflow for big arg  */
293       {
294         index3 = 2;
295         ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
296       }
297       *y      = sign3 * ansf;		/* store answer, with sign bit 	*/
298       x      += stridex;
299       y      += stridey;
300       argcount = 3;				/* we still have 3 good args 	*/
301       if ( --n <=0 )
302       {
303         goto UNROLL;				/* finish up with 3 good args 	*/
304       }
305       goto LOOP3;				/* otherwise, examine next arg  */
306     }
307 
308     if (intf > 0x42800000)			/* if(|x| > 64               	*/
309     {
310 	n3 = -pone;
311         d3 = f3;
312     f3 = n3/d3;
313       index3 = 2; 				/* point to pi/2 upper, lower	*/
314     }
315     else if( intf >= 0x3C800000 )		/* if |x| >= (1/64)... 		*/
316     {
317       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
318       pz[0]  = intz;				/* store as a float (z)		*/
319 	n3     = (f3 - z);
320 	d3     = (pone + f3*z); 		/* get reduced argument		*/
321     f3 = n3/d3;
322       index3 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
323       index3 = index3 + 4;			/* skip over 0,0,pi/2,pi/2	*/
324     }
325     else
326     {
327 	n3 = f3;
328 	d3 = pone;
329 	index3   = 0;				/* points to 0,0 in table	*/
330     }
331     yaddr3   = y;				/* address to store this answer */
332     x       += stridex;				/* point to next arg		*/
333     y       += stridey;				/* point to next result		*/
334     argcount = 4;				/* we now have 4 good arguments */
335     if ( --n <=0 )
336     {
337       goto UNROLL;				/* finish up with 3 good args 	*/
338     }
339 #endif /* UNROLL4 */
340 
341 /* here is the n-way unrolled section,
342    but we may actually have less than n
343    arguments at this point
344 */
345 
346 UNROLL:
347 
348 #ifdef UNROLL4
349     if (argcount == 4)
350     {
351     conup0   = __vlibm_TBL_atan1[index0];
352     conup1   = __vlibm_TBL_atan1[index1];
353     conup2   = __vlibm_TBL_atan1[index2];
354     conup3   = __vlibm_TBL_atan1[index3];
355     poly0    = p1*f0*f0*f0 + f0;
356     ans0     = sign0 * (float)(conup0 + poly0);
357     poly1    = p1*f1*f1*f1 + f1;
358     ans1     = sign1 * (float)(conup1 + poly1);
359     poly2    = p1*f2*f2*f2 + f2;
360     ans2     = sign2 * (float)(conup2 + poly2);
361     poly3    = p1*f3*f3*f3 + f3;
362     ans3     = sign3 * (float)(conup3 + poly3);
363     *yaddr0  = ans0;
364     *yaddr1  = ans1;
365     *yaddr2  = ans2;
366     *yaddr3  = ans3;
367     }
368     else
369 #endif
370     if (argcount == 3)
371     {
372     conup0   = __vlibm_TBL_atan1[index0];
373     conup1   = __vlibm_TBL_atan1[index1];
374     conup2   = __vlibm_TBL_atan1[index2];
375     poly0    = p1*f0*f0*f0 + f0;
376     poly1    = p1*f1*f1*f1 + f1;
377     poly2    = p1*f2*f2*f2 + f2;
378     ans0     = sign0 * (float)(conup0 + poly0);
379     ans1     = sign1 * (float)(conup1 + poly1);
380     ans2     = sign2 * (float)(conup2 + poly2);
381     *yaddr0  = ans0;
382     *yaddr1  = ans1;
383     *yaddr2  = ans2;
384     }
385     else
386     if (argcount == 2)
387     {
388     conup0   = __vlibm_TBL_atan1[index0];
389     conup1   = __vlibm_TBL_atan1[index1];
390     poly0    = p1*f0*f0*f0 + f0;
391     poly1    = p1*f1*f1*f1 + f1;
392     ans0     = sign0 * (float)(conup0 + poly0);
393     ans1     = sign1 * (float)(conup1 + poly1);
394     *yaddr0  = ans0;
395     *yaddr1  = ans1;
396     }
397     else
398     if (argcount == 1)
399     {
400     conup0   = __vlibm_TBL_atan1[index0];
401     poly0    = p1*f0*f0*f0 + f0;
402     ans0     = sign0 * (float)(conup0 + poly0);
403     *yaddr0  = ans0;
404      }
405 
406   }  while (n > 0);
407 
408 }
409