xref: /linux/arch/m68k/fpsp040/satan.S (revision e00d82d0)
11da177e4SLinus Torvalds|
21da177e4SLinus Torvalds|	satan.sa 3.3 12/19/90
31da177e4SLinus Torvalds|
41da177e4SLinus Torvalds|	The entry point satan computes the arctangent of an
51da177e4SLinus Torvalds|	input value. satand does the same except the input value is a
61da177e4SLinus Torvalds|	denormalized number.
71da177e4SLinus Torvalds|
81da177e4SLinus Torvalds|	Input: Double-extended value in memory location pointed to by address
91da177e4SLinus Torvalds|		register a0.
101da177e4SLinus Torvalds|
111da177e4SLinus Torvalds|	Output:	Arctan(X) returned in floating-point register Fp0.
121da177e4SLinus Torvalds|
131da177e4SLinus Torvalds|	Accuracy and Monotonicity: The returned result is within 2 ulps in
141da177e4SLinus Torvalds|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
151da177e4SLinus Torvalds|		result is subsequently rounded to double precision. The
161da177e4SLinus Torvalds|		result is provably monotonic in double precision.
171da177e4SLinus Torvalds|
181da177e4SLinus Torvalds|	Speed: The program satan takes approximately 160 cycles for input
191da177e4SLinus Torvalds|		argument X such that 1/16 < |X| < 16. For the other arguments,
201da177e4SLinus Torvalds|		the program will run no worse than 10% slower.
211da177e4SLinus Torvalds|
221da177e4SLinus Torvalds|	Algorithm:
231da177e4SLinus Torvalds|	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
241da177e4SLinus Torvalds|
251da177e4SLinus Torvalds|	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
261da177e4SLinus Torvalds|		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
271da177e4SLinus Torvalds|		of X with a bit-1 attached at the 6-th bit position. Define u
281da177e4SLinus Torvalds|		to be u = (X-F) / (1 + X*F).
291da177e4SLinus Torvalds|
301da177e4SLinus Torvalds|	Step 3. Approximate arctan(u) by a polynomial poly.
311da177e4SLinus Torvalds|
321da177e4SLinus Torvalds|	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
331da177e4SLinus Torvalds|		calculated beforehand. Exit.
341da177e4SLinus Torvalds|
351da177e4SLinus Torvalds|	Step 5. If |X| >= 16, go to Step 7.
361da177e4SLinus Torvalds|
371da177e4SLinus Torvalds|	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
381da177e4SLinus Torvalds|
391da177e4SLinus Torvalds|	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
401da177e4SLinus Torvalds|		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
411da177e4SLinus Torvalds|
421da177e4SLinus Torvalds
431da177e4SLinus Torvalds|		Copyright (C) Motorola, Inc. 1990
441da177e4SLinus Torvalds|			All Rights Reserved
451da177e4SLinus Torvalds|
46*e00d82d0SMatt Waddel|       For details on the license for this file, please see the
47*e00d82d0SMatt Waddel|       file, README, in this same directory.
481da177e4SLinus Torvalds
491da177e4SLinus Torvalds|satan	idnt	2,1 | Motorola 040 Floating Point Software Package
501da177e4SLinus Torvalds
511da177e4SLinus Torvalds	|section	8
521da177e4SLinus Torvalds
531da177e4SLinus Torvalds#include "fpsp.h"
541da177e4SLinus Torvalds
551da177e4SLinus TorvaldsBOUNDS1:	.long 0x3FFB8000,0x4002FFFF
561da177e4SLinus Torvalds
571da177e4SLinus TorvaldsONE:	.long 0x3F800000
581da177e4SLinus Torvalds
591da177e4SLinus Torvalds	.long 0x00000000
601da177e4SLinus Torvalds
611da177e4SLinus TorvaldsATANA3:	.long 0xBFF6687E,0x314987D8
621da177e4SLinus TorvaldsATANA2:	.long 0x4002AC69,0x34A26DB3
631da177e4SLinus Torvalds
641da177e4SLinus TorvaldsATANA1:	.long 0xBFC2476F,0x4E1DA28E
651da177e4SLinus TorvaldsATANB6:	.long 0x3FB34444,0x7F876989
661da177e4SLinus Torvalds
671da177e4SLinus TorvaldsATANB5:	.long 0xBFB744EE,0x7FAF45DB
681da177e4SLinus TorvaldsATANB4:	.long 0x3FBC71C6,0x46940220
691da177e4SLinus Torvalds
701da177e4SLinus TorvaldsATANB3:	.long 0xBFC24924,0x921872F9
711da177e4SLinus TorvaldsATANB2:	.long 0x3FC99999,0x99998FA9
721da177e4SLinus Torvalds
731da177e4SLinus TorvaldsATANB1:	.long 0xBFD55555,0x55555555
741da177e4SLinus TorvaldsATANC5:	.long 0xBFB70BF3,0x98539E6A
751da177e4SLinus Torvalds
761da177e4SLinus TorvaldsATANC4:	.long 0x3FBC7187,0x962D1D7D
771da177e4SLinus TorvaldsATANC3:	.long 0xBFC24924,0x827107B8
781da177e4SLinus Torvalds
791da177e4SLinus TorvaldsATANC2:	.long 0x3FC99999,0x9996263E
801da177e4SLinus TorvaldsATANC1:	.long 0xBFD55555,0x55555536
811da177e4SLinus Torvalds
821da177e4SLinus TorvaldsPPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
831da177e4SLinus TorvaldsNPIBY2:	.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
841da177e4SLinus TorvaldsPTINY:	.long 0x00010000,0x80000000,0x00000000,0x00000000
851da177e4SLinus TorvaldsNTINY:	.long 0x80010000,0x80000000,0x00000000,0x00000000
861da177e4SLinus Torvalds
871da177e4SLinus TorvaldsATANTBL:
881da177e4SLinus Torvalds	.long	0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
891da177e4SLinus Torvalds	.long	0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
901da177e4SLinus Torvalds	.long	0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
911da177e4SLinus Torvalds	.long	0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
921da177e4SLinus Torvalds	.long	0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
931da177e4SLinus Torvalds	.long	0x3FFB0000,0xAB98E943,0x62765619,0x00000000
941da177e4SLinus Torvalds	.long	0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
951da177e4SLinus Torvalds	.long	0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
961da177e4SLinus Torvalds	.long	0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
971da177e4SLinus Torvalds	.long	0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
981da177e4SLinus Torvalds	.long	0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
991da177e4SLinus Torvalds	.long	0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
1001da177e4SLinus Torvalds	.long	0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
1011da177e4SLinus Torvalds	.long	0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
1021da177e4SLinus Torvalds	.long	0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
1031da177e4SLinus Torvalds	.long	0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
1041da177e4SLinus Torvalds	.long	0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
1051da177e4SLinus Torvalds	.long	0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
1061da177e4SLinus Torvalds	.long	0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
1071da177e4SLinus Torvalds	.long	0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
1081da177e4SLinus Torvalds	.long	0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
1091da177e4SLinus Torvalds	.long	0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
1101da177e4SLinus Torvalds	.long	0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
1111da177e4SLinus Torvalds	.long	0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
1121da177e4SLinus Torvalds	.long	0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
1131da177e4SLinus Torvalds	.long	0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
1141da177e4SLinus Torvalds	.long	0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
1151da177e4SLinus Torvalds	.long	0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
1161da177e4SLinus Torvalds	.long	0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
1171da177e4SLinus Torvalds	.long	0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
1181da177e4SLinus Torvalds	.long	0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
1191da177e4SLinus Torvalds	.long	0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
1201da177e4SLinus Torvalds	.long	0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
1211da177e4SLinus Torvalds	.long	0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
1221da177e4SLinus Torvalds	.long	0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
1231da177e4SLinus Torvalds	.long	0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
1241da177e4SLinus Torvalds	.long	0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
1251da177e4SLinus Torvalds	.long	0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
1261da177e4SLinus Torvalds	.long	0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
1271da177e4SLinus Torvalds	.long	0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
1281da177e4SLinus Torvalds	.long	0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
1291da177e4SLinus Torvalds	.long	0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
1301da177e4SLinus Torvalds	.long	0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
1311da177e4SLinus Torvalds	.long	0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
1321da177e4SLinus Torvalds	.long	0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
1331da177e4SLinus Torvalds	.long	0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
1341da177e4SLinus Torvalds	.long	0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
1351da177e4SLinus Torvalds	.long	0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
1361da177e4SLinus Torvalds	.long	0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
1371da177e4SLinus Torvalds	.long	0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
1381da177e4SLinus Torvalds	.long	0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
1391da177e4SLinus Torvalds	.long	0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
1401da177e4SLinus Torvalds	.long	0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
1411da177e4SLinus Torvalds	.long	0x3FFE0000,0x97731420,0x365E538C,0x00000000
1421da177e4SLinus Torvalds	.long	0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
1431da177e4SLinus Torvalds	.long	0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
1441da177e4SLinus Torvalds	.long	0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
1451da177e4SLinus Torvalds	.long	0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
1461da177e4SLinus Torvalds	.long	0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
1471da177e4SLinus Torvalds	.long	0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
1481da177e4SLinus Torvalds	.long	0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
1491da177e4SLinus Torvalds	.long	0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
1501da177e4SLinus Torvalds	.long	0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
1511da177e4SLinus Torvalds	.long	0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
1521da177e4SLinus Torvalds	.long	0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
1531da177e4SLinus Torvalds	.long	0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
1541da177e4SLinus Torvalds	.long	0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
1551da177e4SLinus Torvalds	.long	0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
1561da177e4SLinus Torvalds	.long	0x3FFE0000,0xE8771129,0xC4353259,0x00000000
1571da177e4SLinus Torvalds	.long	0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
1581da177e4SLinus Torvalds	.long	0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
1591da177e4SLinus Torvalds	.long	0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
1601da177e4SLinus Torvalds	.long	0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
1611da177e4SLinus Torvalds	.long	0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
1621da177e4SLinus Torvalds	.long	0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
1631da177e4SLinus Torvalds	.long	0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
1641da177e4SLinus Torvalds	.long	0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
1651da177e4SLinus Torvalds	.long	0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
1661da177e4SLinus Torvalds	.long	0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
1671da177e4SLinus Torvalds	.long	0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
1681da177e4SLinus Torvalds	.long	0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
1691da177e4SLinus Torvalds	.long	0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
1701da177e4SLinus Torvalds	.long	0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
1711da177e4SLinus Torvalds	.long	0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
1721da177e4SLinus Torvalds	.long	0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
1731da177e4SLinus Torvalds	.long	0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
1741da177e4SLinus Torvalds	.long	0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
1751da177e4SLinus Torvalds	.long	0x3FFF0000,0x9F100575,0x006CC571,0x00000000
1761da177e4SLinus Torvalds	.long	0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
1771da177e4SLinus Torvalds	.long	0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
1781da177e4SLinus Torvalds	.long	0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
1791da177e4SLinus Torvalds	.long	0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
1801da177e4SLinus Torvalds	.long	0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
1811da177e4SLinus Torvalds	.long	0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
1821da177e4SLinus Torvalds	.long	0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
1831da177e4SLinus Torvalds	.long	0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
1841da177e4SLinus Torvalds	.long	0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
1851da177e4SLinus Torvalds	.long	0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
1861da177e4SLinus Torvalds	.long	0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
1871da177e4SLinus Torvalds	.long	0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
1881da177e4SLinus Torvalds	.long	0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
1891da177e4SLinus Torvalds	.long	0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
1901da177e4SLinus Torvalds	.long	0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
1911da177e4SLinus Torvalds	.long	0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
1921da177e4SLinus Torvalds	.long	0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
1931da177e4SLinus Torvalds	.long	0x3FFF0000,0xB525529D,0x562246BD,0x00000000
1941da177e4SLinus Torvalds	.long	0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
1951da177e4SLinus Torvalds	.long	0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
1961da177e4SLinus Torvalds	.long	0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
1971da177e4SLinus Torvalds	.long	0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
1981da177e4SLinus Torvalds	.long	0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
1991da177e4SLinus Torvalds	.long	0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
2001da177e4SLinus Torvalds	.long	0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
2011da177e4SLinus Torvalds	.long	0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
2021da177e4SLinus Torvalds	.long	0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
2031da177e4SLinus Torvalds	.long	0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
2041da177e4SLinus Torvalds	.long	0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
2051da177e4SLinus Torvalds	.long	0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
2061da177e4SLinus Torvalds	.long	0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
2071da177e4SLinus Torvalds	.long	0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
2081da177e4SLinus Torvalds	.long	0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
2091da177e4SLinus Torvalds	.long	0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
2101da177e4SLinus Torvalds	.long	0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
2111da177e4SLinus Torvalds	.long	0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
2121da177e4SLinus Torvalds	.long	0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
2131da177e4SLinus Torvalds	.long	0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
2141da177e4SLinus Torvalds	.long	0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
2151da177e4SLinus Torvalds	.long	0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
2161da177e4SLinus Torvalds
2171da177e4SLinus Torvalds	.set	X,FP_SCR1
2181da177e4SLinus Torvalds	.set	XDCARE,X+2
2191da177e4SLinus Torvalds	.set	XFRAC,X+4
2201da177e4SLinus Torvalds	.set	XFRACLO,X+8
2211da177e4SLinus Torvalds
2221da177e4SLinus Torvalds	.set	ATANF,FP_SCR2
2231da177e4SLinus Torvalds	.set	ATANFHI,ATANF+4
2241da177e4SLinus Torvalds	.set	ATANFLO,ATANF+8
2251da177e4SLinus Torvalds
2261da177e4SLinus Torvalds
2271da177e4SLinus Torvalds	| xref	t_frcinx
2281da177e4SLinus Torvalds	|xref	t_extdnrm
2291da177e4SLinus Torvalds
2301da177e4SLinus Torvalds	.global	satand
2311da177e4SLinus Torvaldssatand:
2321da177e4SLinus Torvalds|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
2331da177e4SLinus Torvalds
2341da177e4SLinus Torvalds	bra		t_extdnrm
2351da177e4SLinus Torvalds
2361da177e4SLinus Torvalds	.global	satan
2371da177e4SLinus Torvaldssatan:
2381da177e4SLinus Torvalds|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
2391da177e4SLinus Torvalds
2401da177e4SLinus Torvalds	fmovex		(%a0),%fp0	| ...LOAD INPUT
2411da177e4SLinus Torvalds
2421da177e4SLinus Torvalds	movel		(%a0),%d0
2431da177e4SLinus Torvalds	movew		4(%a0),%d0
2441da177e4SLinus Torvalds	fmovex		%fp0,X(%a6)
2451da177e4SLinus Torvalds	andil		#0x7FFFFFFF,%d0
2461da177e4SLinus Torvalds
2471da177e4SLinus Torvalds	cmpil		#0x3FFB8000,%d0		| ...|X| >= 1/16?
2481da177e4SLinus Torvalds	bges		ATANOK1
2491da177e4SLinus Torvalds	bra		ATANSM
2501da177e4SLinus Torvalds
2511da177e4SLinus TorvaldsATANOK1:
2521da177e4SLinus Torvalds	cmpil		#0x4002FFFF,%d0		| ...|X| < 16 ?
2531da177e4SLinus Torvalds	bles		ATANMAIN
2541da177e4SLinus Torvalds	bra		ATANBIG
2551da177e4SLinus Torvalds
2561da177e4SLinus Torvalds
2571da177e4SLinus Torvalds|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
2581da177e4SLinus Torvalds|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
2591da177e4SLinus Torvalds|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
2601da177e4SLinus Torvalds|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
2611da177e4SLinus Torvalds|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
2621da177e4SLinus Torvalds|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
2631da177e4SLinus Torvalds|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
2641da177e4SLinus Torvalds|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
2651da177e4SLinus Torvalds|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
2661da177e4SLinus Torvalds|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
2671da177e4SLinus Torvalds|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
2681da177e4SLinus Torvalds|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
2691da177e4SLinus Torvalds|--WILL INVOLVE A VERY LONG POLYNOMIAL.
2701da177e4SLinus Torvalds
2711da177e4SLinus Torvalds|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
2721da177e4SLinus Torvalds|--WE CHOSE F TO BE +-2^K * 1.BBBB1
2731da177e4SLinus Torvalds|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
2741da177e4SLinus Torvalds|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
2751da177e4SLinus Torvalds|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
2761da177e4SLinus Torvalds|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
2771da177e4SLinus Torvalds
2781da177e4SLinus TorvaldsATANMAIN:
2791da177e4SLinus Torvalds
2801da177e4SLinus Torvalds	movew		#0x0000,XDCARE(%a6)	| ...CLEAN UP X JUST IN CASE
2811da177e4SLinus Torvalds	andil		#0xF8000000,XFRAC(%a6)	| ...FIRST 5 BITS
2821da177e4SLinus Torvalds	oril		#0x04000000,XFRAC(%a6)	| ...SET 6-TH BIT TO 1
2831da177e4SLinus Torvalds	movel		#0x00000000,XFRACLO(%a6)	| ...LOCATION OF X IS NOW F
2841da177e4SLinus Torvalds
2851da177e4SLinus Torvalds	fmovex		%fp0,%fp1			| ...FP1 IS X
2861da177e4SLinus Torvalds	fmulx		X(%a6),%fp1		| ...FP1 IS X*F, NOTE THAT X*F > 0
2871da177e4SLinus Torvalds	fsubx		X(%a6),%fp0		| ...FP0 IS X-F
2881da177e4SLinus Torvalds	fadds		#0x3F800000,%fp1		| ...FP1 IS 1 + X*F
2891da177e4SLinus Torvalds	fdivx		%fp1,%fp0			| ...FP0 IS U = (X-F)/(1+X*F)
2901da177e4SLinus Torvalds
2911da177e4SLinus Torvalds|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
2921da177e4SLinus Torvalds|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
2931da177e4SLinus Torvalds|--SAVE REGISTERS FP2.
2941da177e4SLinus Torvalds
2951da177e4SLinus Torvalds	movel		%d2,-(%a7)	| ...SAVE d2 TEMPORARILY
2961da177e4SLinus Torvalds	movel		%d0,%d2		| ...THE EXPO AND 16 BITS OF X
2971da177e4SLinus Torvalds	andil		#0x00007800,%d0	| ...4 VARYING BITS OF F'S FRACTION
2981da177e4SLinus Torvalds	andil		#0x7FFF0000,%d2	| ...EXPONENT OF F
2991da177e4SLinus Torvalds	subil		#0x3FFB0000,%d2	| ...K+4
3001da177e4SLinus Torvalds	asrl		#1,%d2
3011da177e4SLinus Torvalds	addl		%d2,%d0		| ...THE 7 BITS IDENTIFYING F
3021da177e4SLinus Torvalds	asrl		#7,%d0		| ...INDEX INTO TBL OF ATAN(|F|)
3031da177e4SLinus Torvalds	lea		ATANTBL,%a1
3041da177e4SLinus Torvalds	addal		%d0,%a1		| ...ADDRESS OF ATAN(|F|)
3051da177e4SLinus Torvalds	movel		(%a1)+,ATANF(%a6)
3061da177e4SLinus Torvalds	movel		(%a1)+,ATANFHI(%a6)
3071da177e4SLinus Torvalds	movel		(%a1)+,ATANFLO(%a6)	| ...ATANF IS NOW ATAN(|F|)
3081da177e4SLinus Torvalds	movel		X(%a6),%d0		| ...LOAD SIGN AND EXPO. AGAIN
3091da177e4SLinus Torvalds	andil		#0x80000000,%d0	| ...SIGN(F)
3101da177e4SLinus Torvalds	orl		%d0,ATANF(%a6)	| ...ATANF IS NOW SIGN(F)*ATAN(|F|)
3111da177e4SLinus Torvalds	movel		(%a7)+,%d2	| ...RESTORE d2
3121da177e4SLinus Torvalds
3131da177e4SLinus Torvalds|--THAT'S ALL I HAVE TO DO FOR NOW,
3141da177e4SLinus Torvalds|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
3151da177e4SLinus Torvalds
3161da177e4SLinus Torvalds|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
3171da177e4SLinus Torvalds|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
3181da177e4SLinus Torvalds|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
3191da177e4SLinus Torvalds|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
3201da177e4SLinus Torvalds|--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
3211da177e4SLinus Torvalds|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
3221da177e4SLinus Torvalds|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
3231da177e4SLinus Torvalds
3241da177e4SLinus Torvalds
3251da177e4SLinus Torvalds	fmovex		%fp0,%fp1
3261da177e4SLinus Torvalds	fmulx		%fp1,%fp1
3271da177e4SLinus Torvalds	fmoved		ATANA3,%fp2
3281da177e4SLinus Torvalds	faddx		%fp1,%fp2		| ...A3+V
3291da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...V*(A3+V)
3301da177e4SLinus Torvalds	fmulx		%fp0,%fp1		| ...U*V
3311da177e4SLinus Torvalds	faddd		ATANA2,%fp2	| ...A2+V*(A3+V)
3321da177e4SLinus Torvalds	fmuld		ATANA1,%fp1	| ...A1*U*V
3331da177e4SLinus Torvalds	fmulx		%fp2,%fp1		| ...A1*U*V*(A2+V*(A3+V))
3341da177e4SLinus Torvalds
3351da177e4SLinus Torvalds	faddx		%fp1,%fp0		| ...ATAN(U), FP1 RELEASED
3361da177e4SLinus Torvalds	fmovel		%d1,%FPCR		|restore users exceptions
3371da177e4SLinus Torvalds	faddx		ATANF(%a6),%fp0	| ...ATAN(X)
3381da177e4SLinus Torvalds	bra		t_frcinx
3391da177e4SLinus Torvalds
3401da177e4SLinus TorvaldsATANBORS:
3411da177e4SLinus Torvalds|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
3421da177e4SLinus Torvalds|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
3431da177e4SLinus Torvalds	cmpil		#0x3FFF8000,%d0
3441da177e4SLinus Torvalds	bgt		ATANBIG	| ...I.E. |X| >= 16
3451da177e4SLinus Torvalds
3461da177e4SLinus TorvaldsATANSM:
3471da177e4SLinus Torvalds|--|X| <= 1/16
3481da177e4SLinus Torvalds|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
3491da177e4SLinus Torvalds|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
3501da177e4SLinus Torvalds|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
3511da177e4SLinus Torvalds|--WHERE Y = X*X, AND Z = Y*Y.
3521da177e4SLinus Torvalds
3531da177e4SLinus Torvalds	cmpil		#0x3FD78000,%d0
3541da177e4SLinus Torvalds	blt		ATANTINY
3551da177e4SLinus Torvalds|--COMPUTE POLYNOMIAL
3561da177e4SLinus Torvalds	fmulx		%fp0,%fp0	| ...FP0 IS Y = X*X
3571da177e4SLinus Torvalds
3581da177e4SLinus Torvalds
3591da177e4SLinus Torvalds	movew		#0x0000,XDCARE(%a6)
3601da177e4SLinus Torvalds
3611da177e4SLinus Torvalds	fmovex		%fp0,%fp1
3621da177e4SLinus Torvalds	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
3631da177e4SLinus Torvalds
3641da177e4SLinus Torvalds	fmoved		ATANB6,%fp2
3651da177e4SLinus Torvalds	fmoved		ATANB5,%fp3
3661da177e4SLinus Torvalds
3671da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...Z*B6
3681da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...Z*B5
3691da177e4SLinus Torvalds
3701da177e4SLinus Torvalds	faddd		ATANB4,%fp2	| ...B4+Z*B6
3711da177e4SLinus Torvalds	faddd		ATANB3,%fp3	| ...B3+Z*B5
3721da177e4SLinus Torvalds
3731da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...Z*(B4+Z*B6)
3741da177e4SLinus Torvalds	fmulx		%fp3,%fp1		| ...Z*(B3+Z*B5)
3751da177e4SLinus Torvalds
3761da177e4SLinus Torvalds	faddd		ATANB2,%fp2	| ...B2+Z*(B4+Z*B6)
3771da177e4SLinus Torvalds	faddd		ATANB1,%fp1	| ...B1+Z*(B3+Z*B5)
3781da177e4SLinus Torvalds
3791da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...Y*(B2+Z*(B4+Z*B6))
3801da177e4SLinus Torvalds	fmulx		X(%a6),%fp0		| ...X*Y
3811da177e4SLinus Torvalds
3821da177e4SLinus Torvalds	faddx		%fp2,%fp1		| ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
3831da177e4SLinus Torvalds
3841da177e4SLinus Torvalds
3851da177e4SLinus Torvalds	fmulx		%fp1,%fp0	| ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
3861da177e4SLinus Torvalds
3871da177e4SLinus Torvalds	fmovel		%d1,%FPCR		|restore users exceptions
3881da177e4SLinus Torvalds	faddx		X(%a6),%fp0
3891da177e4SLinus Torvalds
3901da177e4SLinus Torvalds	bra		t_frcinx
3911da177e4SLinus Torvalds
3921da177e4SLinus TorvaldsATANTINY:
3931da177e4SLinus Torvalds|--|X| < 2^(-40), ATAN(X) = X
3941da177e4SLinus Torvalds	movew		#0x0000,XDCARE(%a6)
3951da177e4SLinus Torvalds
3961da177e4SLinus Torvalds	fmovel		%d1,%FPCR		|restore users exceptions
3971da177e4SLinus Torvalds	fmovex		X(%a6),%fp0	|last inst - possible exception set
3981da177e4SLinus Torvalds
3991da177e4SLinus Torvalds	bra		t_frcinx
4001da177e4SLinus Torvalds
4011da177e4SLinus TorvaldsATANBIG:
4021da177e4SLinus Torvalds|--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
4031da177e4SLinus Torvalds|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
4041da177e4SLinus Torvalds	cmpil		#0x40638000,%d0
4051da177e4SLinus Torvalds	bgt		ATANHUGE
4061da177e4SLinus Torvalds
4071da177e4SLinus Torvalds|--APPROXIMATE ATAN(-1/X) BY
4081da177e4SLinus Torvalds|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
4091da177e4SLinus Torvalds|--THIS CAN BE RE-WRITTEN AS
4101da177e4SLinus Torvalds|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
4111da177e4SLinus Torvalds
4121da177e4SLinus Torvalds	fmoves		#0xBF800000,%fp1	| ...LOAD -1
4131da177e4SLinus Torvalds	fdivx		%fp0,%fp1		| ...FP1 IS -1/X
4141da177e4SLinus Torvalds
4151da177e4SLinus Torvalds
4161da177e4SLinus Torvalds|--DIVIDE IS STILL CRANKING
4171da177e4SLinus Torvalds
4181da177e4SLinus Torvalds	fmovex		%fp1,%fp0		| ...FP0 IS X'
4191da177e4SLinus Torvalds	fmulx		%fp0,%fp0		| ...FP0 IS Y = X'*X'
4201da177e4SLinus Torvalds	fmovex		%fp1,X(%a6)		| ...X IS REALLY X'
4211da177e4SLinus Torvalds
4221da177e4SLinus Torvalds	fmovex		%fp0,%fp1
4231da177e4SLinus Torvalds	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
4241da177e4SLinus Torvalds
4251da177e4SLinus Torvalds	fmoved		ATANC5,%fp3
4261da177e4SLinus Torvalds	fmoved		ATANC4,%fp2
4271da177e4SLinus Torvalds
4281da177e4SLinus Torvalds	fmulx		%fp1,%fp3		| ...Z*C5
4291da177e4SLinus Torvalds	fmulx		%fp1,%fp2		| ...Z*B4
4301da177e4SLinus Torvalds
4311da177e4SLinus Torvalds	faddd		ATANC3,%fp3	| ...C3+Z*C5
4321da177e4SLinus Torvalds	faddd		ATANC2,%fp2	| ...C2+Z*C4
4331da177e4SLinus Torvalds
4341da177e4SLinus Torvalds	fmulx		%fp3,%fp1		| ...Z*(C3+Z*C5), FP3 RELEASED
4351da177e4SLinus Torvalds	fmulx		%fp0,%fp2		| ...Y*(C2+Z*C4)
4361da177e4SLinus Torvalds
4371da177e4SLinus Torvalds	faddd		ATANC1,%fp1	| ...C1+Z*(C3+Z*C5)
4381da177e4SLinus Torvalds	fmulx		X(%a6),%fp0		| ...X'*Y
4391da177e4SLinus Torvalds
4401da177e4SLinus Torvalds	faddx		%fp2,%fp1		| ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
4411da177e4SLinus Torvalds
4421da177e4SLinus Torvalds
4431da177e4SLinus Torvalds	fmulx		%fp1,%fp0		| ...X'*Y*([B1+Z*(B3+Z*B5)]
4441da177e4SLinus Torvalds|					...	+[Y*(B2+Z*(B4+Z*B6))])
4451da177e4SLinus Torvalds	faddx		X(%a6),%fp0
4461da177e4SLinus Torvalds
4471da177e4SLinus Torvalds	fmovel		%d1,%FPCR		|restore users exceptions
4481da177e4SLinus Torvalds
4491da177e4SLinus Torvalds	btstb		#7,(%a0)
4501da177e4SLinus Torvalds	beqs		pos_big
4511da177e4SLinus Torvalds
4521da177e4SLinus Torvaldsneg_big:
4531da177e4SLinus Torvalds	faddx		NPIBY2,%fp0
4541da177e4SLinus Torvalds	bra		t_frcinx
4551da177e4SLinus Torvalds
4561da177e4SLinus Torvaldspos_big:
4571da177e4SLinus Torvalds	faddx		PPIBY2,%fp0
4581da177e4SLinus Torvalds	bra		t_frcinx
4591da177e4SLinus Torvalds
4601da177e4SLinus TorvaldsATANHUGE:
4611da177e4SLinus Torvalds|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
4621da177e4SLinus Torvalds	btstb		#7,(%a0)
4631da177e4SLinus Torvalds	beqs		pos_huge
4641da177e4SLinus Torvalds
4651da177e4SLinus Torvaldsneg_huge:
4661da177e4SLinus Torvalds	fmovex		NPIBY2,%fp0
4671da177e4SLinus Torvalds	fmovel		%d1,%fpcr
4681da177e4SLinus Torvalds	fsubx		NTINY,%fp0
4691da177e4SLinus Torvalds	bra		t_frcinx
4701da177e4SLinus Torvalds
4711da177e4SLinus Torvaldspos_huge:
4721da177e4SLinus Torvalds	fmovex		PPIBY2,%fp0
4731da177e4SLinus Torvalds	fmovel		%d1,%fpcr
4741da177e4SLinus Torvalds	fsubx		PTINY,%fp0
4751da177e4SLinus Torvalds	bra		t_frcinx
4761da177e4SLinus Torvalds
4771da177e4SLinus Torvalds	|end
478