1; 2; Copyright (c) 1985 Regents of the University of California. 3; All rights reserved. 4; 5; Redistribution and use in source and binary forms are permitted 6; provided that this notice is preserved and that due credit is given 7; to the University of California at Berkeley. The name of the University 8; may not be used to endorse or promote products derived from this 9; software without specific prior written permission. This software 10; is provided ``as is'' without express or implied warranty. 11; 12; All recipients should regard themselves as participants in an ongoing 13; research project and hence should feel obligated to report their 14; experiences (good or bad) with these elementary function codes, using 15; the sendbug(8) program, to the authors. 16; 17; @(#)sqrt.s 5.2 (Berkeley) 04/29/88 18 19; double sqrt(x) 20; double x; 21; IEEE double precision sqrt 22; code in NSC assembly by K.C. Ng 23; 12/13/85 24; 25; Method: 26; Use Kahan's trick to get 8 bits initial approximation 27; by integer shift and add/subtract. Then three Newton 28; iterations to get down error to within one ulp. Finally 29; twiddle the last bit to make to correctly rounded 30; according to the rounding mode. 31; 32 .vers 2 33 .text 34 .align 2 35 .globl _sqrt 36_sqrt: 37 enter [r3,r4,r5,r6,r7],44 38 movl f4,tos 39 movl f6,tos 40 movd 2146435072,r2 ; r2 = 0x7ff00000 41 movl 8(fp),f0 ; f2 = x 42 movd 12(fp),r3 ; r3 = high part of x 43 movd r3,r4 ; make a copy of high part of x in r4 44 andd r2,r3 ; r3 become the bias exponent of x 45 cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN 46 bne L22 47 ; to see if x is INF 48 movd 8(fp),r0 ; r0 = low part of x 49 movd r4,r1 ; r1 is high part of x again 50 andd 0xfff00000,r1 ; mask off the sign and exponent of x 51 ord r0,r1 ; or with low part, if 0 then x is INF 52 cmpqd 0,r1 ; 53 bne L1 ; not 0; therefore x is NaN; return x. 54 cmpqd 0,r4 ; now x is Inf, is it +inf? 55 blt L1 ; +INF, return x 56 ; -INF, return NaN by doing 0/0 57nan: movl 0f0.0,f0 ; 58 divl f0,f0 59 br L1 60L22: ; now x is finite 61 cmpl 0f0.0,f0 ; x = 0 ? 62 beq L1 ; return x if x is +0 or -0 63 cmpqd 0,r4 ; Is x < 0 ? 64 bgt nan ; if x < 0 return NaN 65 movqd 0,r5 ; r5 == scalx initialize to zero 66 cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent) 67 bne L21 ; if x is normal, goto L21 68 movl L30,f2 ; f2 = 2**54 69 mull f2,f0 ; scale up x by 2**54 70 subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent 71L21: 72 ; now x is normal 73 ; notations: 74 ; r1 == copy of fsr 75 ; r2 == mask of e inexact enable flag 76 ; r3 == mask of i inexact flag 77 ; r4 == mask of r rounding mode 78 ; r5 == x's scale factor (already defined) 79 80 movd 0x20,r2 81 movd 0x40,r3 82 movd 0x180,r4 83 sfsr r0 ; store fsr to r0 84 movd r0,r1 ; make a copy of fsr to r1 85 bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest 86 lfsr r0 87 88 ; begin to compute sqrt(x) 89 movl f0,-8(fp) 90 movd -4(fp),r0 ; r0 the high part of modified x 91 lshd -1,r0 ; r0 >> 1 92 addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx. 93 movd r0,r6 94 lshd -13,r6 ; r6 = r0>>-15 95 andd 0x7c,r6 ; obtain 4*leading 5 bits of r0 96 addrd L29,r7 ; r7 = address of L29 = table[0] 97 addd r6,r7 ; r6 = address of L29[r6] = table[r6] 98 subd 0(r7),r0 ; r0 = r0 - table[r6] 99 movd r0,-4(fp) 100 movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits 101 102 movl 0f0.5,f6 ; f6 = 0.5 103 movl f0,f4 104 divl f2,f4 ; t = x/y 105 addl f4,f2 ; y = y + x/y 106 mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx. 107 movl f0,f4 108 divl f2,f4 ; t = x/y 109 addl f4,f2 ; y = y + x/y 110 mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx. 111 movl f0,f4 112 divl f2,f4 ; t = x/y 113 subl f2,f4 ; t = x/y - y 114 mull f6,f4 ; t = 0.5(x/y-y) 115 addl f4,f2 ; y = y + 0.5(x/y -y) 116 ; now y approx. sqrt(x) to within 1 ulp 117 118 ; twiddle last bit to force y correctly rounded 119 movd r1,r0 ; restore the old fsr 120 bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable 121 ord 0x80,r0 ; set rounding mode to round to zero 122 lfsr r0 123 124 movl f0,f4 125 divl f2,f4 ; f4 = x/y 126 sfsr r0 127 andd r3,r0 ; get the inexact flag 128 cmpqd 0,r0 129 bne L18 130 ; if x/y exact, then ... 131 cmpl f2,f4 ; if y == x/y 132 beq L2 133 movl f4,-8(fp) 134 subd 1,-8(fp) 135 subcd 0,-4(fp) 136 movl -8(fp),f4 ; f4 = f4 - ulp 137L18: 138 bicd [6],r1 139 ord r3,r1 ; set inexact flag in r1 140 141 andd r1,r4 ; r4 = the old rounding mode 142 cmpqd 0,r4 ; round to nearest? 143 bne L17 144 movl f4,-8(fp) 145 addd 1,-8(fp) 146 addcd 0,-4(fp) 147 movl -8(fp),f4 ; f4 = f4 + ulp 148 br L16 149L17: 150 cmpd 0x100,r4 ; round to positive inf ? 151 bne L16 152 movl f4,-8(fp) 153 addd 1,-8(fp) 154 addcd 0,-4(fp) 155 movl -8(fp),f4 ; f4 = f4 + ulp 156 157 movl f2,-8(fp) 158 addd 1,-8(fp) 159 addcd 0,-4(fp) 160 movl -8(fp),f2 ; f2 = f2 + ulp 161L16: 162 addl f4,f2 ; y = y + t 163 subd 0x100000,r5 ; scalx = scalx - 1 164L2: 165 movl f2,-8(fp) 166 addd r5,-4(fp) 167 movl -8(fp),f0 168 lfsr r1 169L1: 170 movl tos,f6 171 movl tos,f4 172 exit [r3,r4,r5,r6,r7] 173 ret 0 174 .data 175L28: .byte 64,40,35,41,115,113,114,116,46,99 176 .byte 9,49,46,49,32,40,117,99,98,46 177 .byte 101,108,101,102,117,110,116,41,32,57 178 .byte 47,49,57,47,56,53,0 179L29: .blkb 4 180 .double 1204 181 .double 3062 182 .double 5746 183 .double 9193 184 .double 13348 185 .double 18162 186 .double 23592 187 .double 29598 188 .double 36145 189 .double 43202 190 .double 50740 191 .double 58733 192 .double 67158 193 .double 75992 194 .double 85215 195 .double 83599 196 .double 71378 197 .double 60428 198 .double 50647 199 .double 41945 200 .double 34246 201 .double 27478 202 .double 21581 203 .double 16499 204 .double 12183 205 .double 8588 206 .double 5674 207 .double 3403 208 .double 1742 209 .double 661 210 .double 130 211L30: .blkb 4 212 .double 1129316352 ;L30: .double 0,0x43500000 213L31: .blkb 4 214 .double 0x1ff00000 215L32: .blkb 4 216 .double 0x5ff00000 217