1; Copyright (c) 1985, 1993 2; The Regents of the University of California. All rights reserved. 3; 4; %sccs.include.redist.semicolon% 5; 6; @(#)sqrt.s 8.1 (Berkeley) 06/04/93 7; 8 9; double sqrt(x) 10; double x; 11; IEEE double precision sqrt 12; code in NSC assembly by K.C. Ng 13; 12/13/85 14; 15; Method: 16; Use Kahan's trick to get 8 bits initial approximation 17; by integer shift and add/subtract. Then three Newton 18; iterations to get down error to within one ulp. Finally 19; twiddle the last bit to make to correctly rounded 20; according to the rounding mode. 21; 22 .vers 2 23 .text 24 .align 2 25 .globl _sqrt 26_sqrt: 27 enter [r3,r4,r5,r6,r7],44 28 movl f4,tos 29 movl f6,tos 30 movd 2146435072,r2 ; r2 = 0x7ff00000 31 movl 8(fp),f0 ; f2 = x 32 movd 12(fp),r3 ; r3 = high part of x 33 movd r3,r4 ; make a copy of high part of x in r4 34 andd r2,r3 ; r3 become the bias exponent of x 35 cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN 36 bne L22 37 ; to see if x is INF 38 movd 8(fp),r0 ; r0 = low part of x 39 movd r4,r1 ; r1 is high part of x again 40 andd 0xfff00000,r1 ; mask off the sign and exponent of x 41 ord r0,r1 ; or with low part, if 0 then x is INF 42 cmpqd 0,r1 ; 43 bne L1 ; not 0; therefore x is NaN; return x. 44 cmpqd 0,r4 ; now x is Inf, is it +inf? 45 blt L1 ; +INF, return x 46 ; -INF, return NaN by doing 0/0 47nan: movl 0f0.0,f0 ; 48 divl f0,f0 49 br L1 50L22: ; now x is finite 51 cmpl 0f0.0,f0 ; x = 0 ? 52 beq L1 ; return x if x is +0 or -0 53 cmpqd 0,r4 ; Is x < 0 ? 54 bgt nan ; if x < 0 return NaN 55 movqd 0,r5 ; r5 == scalx initialize to zero 56 cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent) 57 bne L21 ; if x is normal, goto L21 58 movl L30,f2 ; f2 = 2**54 59 mull f2,f0 ; scale up x by 2**54 60 subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent 61L21: 62 ; now x is normal 63 ; notations: 64 ; r1 == copy of fsr 65 ; r2 == mask of e inexact enable flag 66 ; r3 == mask of i inexact flag 67 ; r4 == mask of r rounding mode 68 ; r5 == x's scale factor (already defined) 69 70 movd 0x20,r2 71 movd 0x40,r3 72 movd 0x180,r4 73 sfsr r0 ; store fsr to r0 74 movd r0,r1 ; make a copy of fsr to r1 75 bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest 76 lfsr r0 77 78 ; begin to compute sqrt(x) 79 movl f0,-8(fp) 80 movd -4(fp),r0 ; r0 the high part of modified x 81 lshd -1,r0 ; r0 >> 1 82 addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx. 83 movd r0,r6 84 lshd -13,r6 ; r6 = r0>>-15 85 andd 0x7c,r6 ; obtain 4*leading 5 bits of r0 86 addrd L29,r7 ; r7 = address of L29 = table[0] 87 addd r6,r7 ; r6 = address of L29[r6] = table[r6] 88 subd 0(r7),r0 ; r0 = r0 - table[r6] 89 movd r0,-4(fp) 90 movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits 91 92 movl 0f0.5,f6 ; f6 = 0.5 93 movl f0,f4 94 divl f2,f4 ; t = x/y 95 addl f4,f2 ; y = y + x/y 96 mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx. 97 movl f0,f4 98 divl f2,f4 ; t = x/y 99 addl f4,f2 ; y = y + x/y 100 mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx. 101 movl f0,f4 102 divl f2,f4 ; t = x/y 103 subl f2,f4 ; t = x/y - y 104 mull f6,f4 ; t = 0.5(x/y-y) 105 addl f4,f2 ; y = y + 0.5(x/y -y) 106 ; now y approx. sqrt(x) to within 1 ulp 107 108 ; twiddle last bit to force y correctly rounded 109 movd r1,r0 ; restore the old fsr 110 bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable 111 ord 0x80,r0 ; set rounding mode to round to zero 112 lfsr r0 113 114 movl f0,f4 115 divl f2,f4 ; f4 = x/y 116 sfsr r0 117 andd r3,r0 ; get the inexact flag 118 cmpqd 0,r0 119 bne L18 120 ; if x/y exact, then ... 121 cmpl f2,f4 ; if y == x/y 122 beq L2 123 movl f4,-8(fp) 124 subd 1,-8(fp) 125 subcd 0,-4(fp) 126 movl -8(fp),f4 ; f4 = f4 - ulp 127L18: 128 bicd [6],r1 129 ord r3,r1 ; set inexact flag in r1 130 131 andd r1,r4 ; r4 = the old rounding mode 132 cmpqd 0,r4 ; round to nearest? 133 bne L17 134 movl f4,-8(fp) 135 addd 1,-8(fp) 136 addcd 0,-4(fp) 137 movl -8(fp),f4 ; f4 = f4 + ulp 138 br L16 139L17: 140 cmpd 0x100,r4 ; round to positive inf ? 141 bne L16 142 movl f4,-8(fp) 143 addd 1,-8(fp) 144 addcd 0,-4(fp) 145 movl -8(fp),f4 ; f4 = f4 + ulp 146 147 movl f2,-8(fp) 148 addd 1,-8(fp) 149 addcd 0,-4(fp) 150 movl -8(fp),f2 ; f2 = f2 + ulp 151L16: 152 addl f4,f2 ; y = y + t 153 subd 0x100000,r5 ; scalx = scalx - 1 154L2: 155 movl f2,-8(fp) 156 addd r5,-4(fp) 157 movl -8(fp),f0 158 lfsr r1 159L1: 160 movl tos,f6 161 movl tos,f4 162 exit [r3,r4,r5,r6,r7] 163 ret 0 164 .data 165L28: .byte 64,40,35,41,115,113,114,116,46,99 166 .byte 9,49,46,49,32,40,117,99,98,46 167 .byte 101,108,101,102,117,110,116,41,32,57 168 .byte 47,49,57,47,56,53,0 169L29: .blkb 4 170 .double 1204 171 .double 3062 172 .double 5746 173 .double 9193 174 .double 13348 175 .double 18162 176 .double 23592 177 .double 29598 178 .double 36145 179 .double 43202 180 .double 50740 181 .double 58733 182 .double 67158 183 .double 75992 184 .double 85215 185 .double 83599 186 .double 71378 187 .double 60428 188 .double 50647 189 .double 41945 190 .double 34246 191 .double 27478 192 .double 21581 193 .double 16499 194 .double 12183 195 .double 8588 196 .double 5674 197 .double 3403 198 .double 1742 199 .double 661 200 .double 130 201L30: .blkb 4 202 .double 1129316352 ;L30: .double 0,0x43500000 203L31: .blkb 4 204 .double 0x1ff00000 205L32: .blkb 4 206 .double 0x5ff00000 207