xref: /original-bsd/lib/libm/national/sqrt.s (revision 241757c4)
1;
2; Copyright (c) 1985 Regents of the University of California.
3; All rights reserved.
4;
5; Redistribution and use in source and binary forms are permitted
6; provided that this notice is preserved and that due credit is given
7; to the University of California at Berkeley. The name of the University
8; may not be used to endorse or promote products derived from this
9; software without specific prior written permission. This software
10; is provided ``as is'' without express or implied warranty.
11;
12; All recipients should regard themselves as participants in an ongoing
13; research project and hence should feel obligated to report their
14; experiences (good or bad) with these elementary function codes, using
15; the sendbug(8) program, to the authors.
16;
17;	@(#)sqrt.s	5.2 (Berkeley) 04/29/88
18
19; double sqrt(x)
20; double x;
21; IEEE double precision sqrt
22; code in NSC assembly by K.C. Ng
23; 12/13/85
24;
25; Method:
26; 	Use Kahan's trick to get 8 bits initial approximation
27;	by integer shift and add/subtract. Then three Newton
28;	iterations to get down error to within one ulp. Finally
29;	twiddle the last bit to make to correctly rounded
30;	according to the rounding mode.
31;
32	.vers	2
33	.text
34	.align	2
35	.globl	_sqrt
36_sqrt:
37	enter	[r3,r4,r5,r6,r7],44
38	movl	f4,tos
39	movl	f6,tos
40	movd	2146435072,r2	; r2 = 0x7ff00000
41	movl	8(fp),f0	; f2 = x
42	movd	12(fp),r3	; r3 = high part of x
43	movd	r3,r4		; make a copy of high part of x in r4
44	andd	r2,r3		; r3 become the bias exponent of x
45	cmpd	r2,r3		; if r3 = 0x7ff00000 then x is INF or NAN
46	bne	L22
47				; to see if x is INF
48	movd	8(fp),r0	; r0 = low part of x
49	movd	r4,r1		; r1 is high part of x again
50	andd	0xfff00000,r1	; mask off the sign and exponent of x
51	ord	r0,r1		; or with low part, if 0 then x is INF
52	cmpqd	0,r1		;
53	bne	L1		; not 0; therefore x is NaN; return x.
54	cmpqd	0,r4		; now x is Inf, is it +inf?
55	blt	L1		; +INF, return x
56				; -INF, return NaN by doing 0/0
57nan:	movl	0f0.0,f0	;
58	divl	f0,f0
59	br	L1
60L22:				; now x is finite
61	cmpl	0f0.0,f0	; x = 0 ?
62	beq	L1		; return x if x is +0 or -0
63	cmpqd	0,r4		; Is x < 0 ?
64	bgt	nan		; if x < 0 return NaN
65	movqd	0,r5		; r5 == scalx initialize to zero
66	cmpqd	0,r3		; is x is subnormal ?  (r3 is the exponent)
67	bne	L21		; if x is normal, goto L21
68	movl	L30,f2		; f2 = 2**54
69	mull	f2,f0		; scale up x by 2**54
70	subd	0x1b00000,r5	; off set the scale factor by -27 in exponent
71L21:
72				; now x is normal
73				; notations:
74				;    r1 == copy of fsr
75				;    r2 == mask of e inexact enable flag
76				;    r3 == mask of i inexact flag
77				;    r4 == mask of r rounding mode
78				;    r5 == x's scale factor (already defined)
79
80	movd	0x20,r2
81	movd	0x40,r3
82	movd	0x180,r4
83	sfsr	r0		; store fsr to r0
84	movd	r0,r1		; make a copy of fsr to r1
85	bicd	[5,6,7,8],r0	; clear e,i, and set r to round to nearest
86	lfsr	r0
87
88				; begin to compute sqrt(x)
89	movl	f0,-8(fp)
90	movd	-4(fp),r0	; r0 the high part of modified x
91	lshd	-1,r0		; r0 >> 1
92	addd	0x1ff80000,r0	; add correction to r0  ...got 5 bits approx.
93	movd	r0,r6
94	lshd	-13,r6		; r6 = r0>>-15
95	andd	0x7c,r6		; obtain 4*leading 5 bits of r0
96	addrd	L29,r7		; r7 = address of L29 = table[0]
97	addd	r6,r7		; r6 = address of L29[r6] = table[r6]
98	subd	0(r7),r0	; r0 = r0 - table[r6]
99	movd	r0,-4(fp)
100	movl	-8(fp),f2	; now f2 = y approximate sqrt(f0) to 8 bits
101
102	movl	0f0.5,f6	; f6 = 0.5
103	movl	f0,f4
104	divl	f2,f4		; t = x/y
105	addl	f4,f2		; y = y + x/y
106	mull	f6,f2		; y = 0.5(y+x/y) got 17 bits approx.
107	movl	f0,f4
108	divl	f2,f4		; t = x/y
109	addl	f4,f2		; y = y + x/y
110	mull	f6,f2		; y = 0.5(y+x/y) got 35 bits approx.
111	movl	f0,f4
112	divl	f2,f4		; t = x/y
113	subl	f2,f4		; t = x/y - y
114	mull	f6,f4		; t = 0.5(x/y-y)
115	addl	f4,f2		; y = y + 0.5(x/y -y)
116				; now y approx. sqrt(x) to within 1 ulp
117
118				; twiddle last bit to force y correctly rounded
119	movd	r1,r0		; restore the old fsr
120	bicd	[6,7,8],r0	; clear inexact bit but retain inexact enable
121	ord	0x80,r0		; set rounding mode to round to zero
122	lfsr	r0
123
124	movl	f0,f4
125	divl	f2,f4		; f4 = x/y
126	sfsr	r0
127	andd	r3,r0		; get the inexact flag
128	cmpqd	0,r0
129	bne	L18
130				; if x/y exact, then ...
131	cmpl	f2,f4		; if y == x/y
132	beq	L2
133	movl	f4,-8(fp)
134	subd	1,-8(fp)
135	subcd	0,-4(fp)
136	movl	-8(fp),f4	; f4 = f4 - ulp
137L18:
138	bicd	[6],r1
139	ord	r3,r1		; set inexact flag in r1
140
141	andd	r1,r4		; r4 = the old rounding mode
142	cmpqd	0,r4		; round to nearest?
143	bne	L17
144	movl	f4,-8(fp)
145	addd	1,-8(fp)
146	addcd	0,-4(fp)
147	movl	-8(fp),f4	; f4 = f4 + ulp
148	br	L16
149L17:
150	cmpd	0x100,r4	; round to positive inf ?
151	bne	L16
152	movl	f4,-8(fp)
153	addd	1,-8(fp)
154	addcd	0,-4(fp)
155	movl	-8(fp),f4	; f4 = f4 + ulp
156
157	movl	f2,-8(fp)
158	addd	1,-8(fp)
159	addcd	0,-4(fp)
160	movl	-8(fp),f2	; f2 = f2 + ulp
161L16:
162	addl	f4,f2		; y  = y + t
163	subd	0x100000,r5	; scalx = scalx - 1
164L2:
165	movl	f2,-8(fp)
166	addd	r5,-4(fp)
167	movl	-8(fp),f0
168	lfsr	r1
169L1:
170	movl	tos,f6
171	movl	tos,f4
172	exit	[r3,r4,r5,r6,r7]
173	ret	0
174	.data
175L28:	.byte	64,40,35,41,115,113,114,116,46,99
176	.byte	9,49,46,49,32,40,117,99,98,46
177	.byte	101,108,101,102,117,110,116,41,32,57
178	.byte	47,49,57,47,56,53,0
179L29:	.blkb	4
180	.double	1204
181	.double	3062
182	.double	5746
183	.double	9193
184	.double	13348
185	.double	18162
186	.double	23592
187	.double	29598
188	.double	36145
189	.double	43202
190	.double	50740
191	.double	58733
192	.double	67158
193	.double	75992
194	.double	85215
195	.double	83599
196	.double	71378
197	.double	60428
198	.double	50647
199	.double	41945
200	.double	34246
201	.double	27478
202	.double	21581
203	.double	16499
204	.double	12183
205	.double	8588
206	.double	5674
207	.double	3403
208	.double	1742
209	.double	661
210	.double	130
211L30:	.blkb	4
212	.double	1129316352 	;L30:	.double 0,0x43500000
213L31:	.blkb	4
214	.double 0x1ff00000
215L32:	.blkb	4
216	.double 0x5ff00000
217