xref: /original-bsd/lib/libm/national/sqrt.s (revision f51da917)
1; Copyright (c) 1985, 1993
2;	The Regents of the University of California.  All rights reserved.
3;
4; %sccs.include.redist.semicolon%
5;
6;	@(#)sqrt.s	8.1 (Berkeley) 06/04/93
7;
8
9; double sqrt(x)
10; double x;
11; IEEE double precision sqrt
12; code in NSC assembly by K.C. Ng
13; 12/13/85
14;
15; Method:
16; 	Use Kahan's trick to get 8 bits initial approximation
17;	by integer shift and add/subtract. Then three Newton
18;	iterations to get down error to within one ulp. Finally
19;	twiddle the last bit to make to correctly rounded
20;	according to the rounding mode.
21;
22	.vers	2
23	.text
24	.align	2
25	.globl	_sqrt
26_sqrt:
27	enter	[r3,r4,r5,r6,r7],44
28	movl	f4,tos
29	movl	f6,tos
30	movd	2146435072,r2	; r2 = 0x7ff00000
31	movl	8(fp),f0	; f2 = x
32	movd	12(fp),r3	; r3 = high part of x
33	movd	r3,r4		; make a copy of high part of x in r4
34	andd	r2,r3		; r3 become the bias exponent of x
35	cmpd	r2,r3		; if r3 = 0x7ff00000 then x is INF or NAN
36	bne	L22
37				; to see if x is INF
38	movd	8(fp),r0	; r0 = low part of x
39	movd	r4,r1		; r1 is high part of x again
40	andd	0xfff00000,r1	; mask off the sign and exponent of x
41	ord	r0,r1		; or with low part, if 0 then x is INF
42	cmpqd	0,r1		;
43	bne	L1		; not 0; therefore x is NaN; return x.
44	cmpqd	0,r4		; now x is Inf, is it +inf?
45	blt	L1		; +INF, return x
46				; -INF, return NaN by doing 0/0
47nan:	movl	0f0.0,f0	;
48	divl	f0,f0
49	br	L1
50L22:				; now x is finite
51	cmpl	0f0.0,f0	; x = 0 ?
52	beq	L1		; return x if x is +0 or -0
53	cmpqd	0,r4		; Is x < 0 ?
54	bgt	nan		; if x < 0 return NaN
55	movqd	0,r5		; r5 == scalx initialize to zero
56	cmpqd	0,r3		; is x is subnormal ?  (r3 is the exponent)
57	bne	L21		; if x is normal, goto L21
58	movl	L30,f2		; f2 = 2**54
59	mull	f2,f0		; scale up x by 2**54
60	subd	0x1b00000,r5	; off set the scale factor by -27 in exponent
61L21:
62				; now x is normal
63				; notations:
64				;    r1 == copy of fsr
65				;    r2 == mask of e inexact enable flag
66				;    r3 == mask of i inexact flag
67				;    r4 == mask of r rounding mode
68				;    r5 == x's scale factor (already defined)
69
70	movd	0x20,r2
71	movd	0x40,r3
72	movd	0x180,r4
73	sfsr	r0		; store fsr to r0
74	movd	r0,r1		; make a copy of fsr to r1
75	bicd	[5,6,7,8],r0	; clear e,i, and set r to round to nearest
76	lfsr	r0
77
78				; begin to compute sqrt(x)
79	movl	f0,-8(fp)
80	movd	-4(fp),r0	; r0 the high part of modified x
81	lshd	-1,r0		; r0 >> 1
82	addd	0x1ff80000,r0	; add correction to r0  ...got 5 bits approx.
83	movd	r0,r6
84	lshd	-13,r6		; r6 = r0>>-15
85	andd	0x7c,r6		; obtain 4*leading 5 bits of r0
86	addrd	L29,r7		; r7 = address of L29 = table[0]
87	addd	r6,r7		; r6 = address of L29[r6] = table[r6]
88	subd	0(r7),r0	; r0 = r0 - table[r6]
89	movd	r0,-4(fp)
90	movl	-8(fp),f2	; now f2 = y approximate sqrt(f0) to 8 bits
91
92	movl	0f0.5,f6	; f6 = 0.5
93	movl	f0,f4
94	divl	f2,f4		; t = x/y
95	addl	f4,f2		; y = y + x/y
96	mull	f6,f2		; y = 0.5(y+x/y) got 17 bits approx.
97	movl	f0,f4
98	divl	f2,f4		; t = x/y
99	addl	f4,f2		; y = y + x/y
100	mull	f6,f2		; y = 0.5(y+x/y) got 35 bits approx.
101	movl	f0,f4
102	divl	f2,f4		; t = x/y
103	subl	f2,f4		; t = x/y - y
104	mull	f6,f4		; t = 0.5(x/y-y)
105	addl	f4,f2		; y = y + 0.5(x/y -y)
106				; now y approx. sqrt(x) to within 1 ulp
107
108				; twiddle last bit to force y correctly rounded
109	movd	r1,r0		; restore the old fsr
110	bicd	[6,7,8],r0	; clear inexact bit but retain inexact enable
111	ord	0x80,r0		; set rounding mode to round to zero
112	lfsr	r0
113
114	movl	f0,f4
115	divl	f2,f4		; f4 = x/y
116	sfsr	r0
117	andd	r3,r0		; get the inexact flag
118	cmpqd	0,r0
119	bne	L18
120				; if x/y exact, then ...
121	cmpl	f2,f4		; if y == x/y
122	beq	L2
123	movl	f4,-8(fp)
124	subd	1,-8(fp)
125	subcd	0,-4(fp)
126	movl	-8(fp),f4	; f4 = f4 - ulp
127L18:
128	bicd	[6],r1
129	ord	r3,r1		; set inexact flag in r1
130
131	andd	r1,r4		; r4 = the old rounding mode
132	cmpqd	0,r4		; round to nearest?
133	bne	L17
134	movl	f4,-8(fp)
135	addd	1,-8(fp)
136	addcd	0,-4(fp)
137	movl	-8(fp),f4	; f4 = f4 + ulp
138	br	L16
139L17:
140	cmpd	0x100,r4	; round to positive inf ?
141	bne	L16
142	movl	f4,-8(fp)
143	addd	1,-8(fp)
144	addcd	0,-4(fp)
145	movl	-8(fp),f4	; f4 = f4 + ulp
146
147	movl	f2,-8(fp)
148	addd	1,-8(fp)
149	addcd	0,-4(fp)
150	movl	-8(fp),f2	; f2 = f2 + ulp
151L16:
152	addl	f4,f2		; y  = y + t
153	subd	0x100000,r5	; scalx = scalx - 1
154L2:
155	movl	f2,-8(fp)
156	addd	r5,-4(fp)
157	movl	-8(fp),f0
158	lfsr	r1
159L1:
160	movl	tos,f6
161	movl	tos,f4
162	exit	[r3,r4,r5,r6,r7]
163	ret	0
164	.data
165L28:	.byte	64,40,35,41,115,113,114,116,46,99
166	.byte	9,49,46,49,32,40,117,99,98,46
167	.byte	101,108,101,102,117,110,116,41,32,57
168	.byte	47,49,57,47,56,53,0
169L29:	.blkb	4
170	.double	1204
171	.double	3062
172	.double	5746
173	.double	9193
174	.double	13348
175	.double	18162
176	.double	23592
177	.double	29598
178	.double	36145
179	.double	43202
180	.double	50740
181	.double	58733
182	.double	67158
183	.double	75992
184	.double	85215
185	.double	83599
186	.double	71378
187	.double	60428
188	.double	50647
189	.double	41945
190	.double	34246
191	.double	27478
192	.double	21581
193	.double	16499
194	.double	12183
195	.double	8588
196	.double	5674
197	.double	3403
198	.double	1742
199	.double	661
200	.double	130
201L30:	.blkb	4
202	.double	1129316352 	;L30:	.double 0,0x43500000
203L31:	.blkb	4
204	.double 0x1ff00000
205L32:	.blkb	4
206	.double 0x5ff00000
207