1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file provides Go implementations of elementary multi-precision 6// arithmetic operations on word vectors. Needed for platforms without 7// assembly implementations of these routines. 8 9package big 10 11// A Word represents a single digit of a multi-precision unsigned integer. 12type Word uintptr 13 14const ( 15 // Compute the size _S of a Word in bytes. 16 _m = ^Word(0) 17 _logS = _m>>8&1 + _m>>16&1 + _m>>32&1 18 _S = 1 << _logS 19 20 _W = _S << 3 // word size in bits 21 _B = 1 << _W // digit base 22 _M = _B - 1 // digit mask 23 24 _W2 = _W / 2 // half word size in bits 25 _B2 = 1 << _W2 // half digit base 26 _M2 = _B2 - 1 // half digit mask 27) 28 29// ---------------------------------------------------------------------------- 30// Elementary operations on words 31// 32// These operations are used by the vector operations below. 33 34// z1<<_W + z0 = x+y+c, with c == 0 or 1 35func addWW_g(x, y, c Word) (z1, z0 Word) { 36 yc := y + c 37 z0 = x + yc 38 if z0 < x || yc < y { 39 z1 = 1 40 } 41 return 42} 43 44// z1<<_W + z0 = x-y-c, with c == 0 or 1 45func subWW_g(x, y, c Word) (z1, z0 Word) { 46 yc := y + c 47 z0 = x - yc 48 if z0 > x || yc < y { 49 z1 = 1 50 } 51 return 52} 53 54// z1<<_W + z0 = x*y 55// Adapted from Warren, Hacker's Delight, p. 132. 56func mulWW_g(x, y Word) (z1, z0 Word) { 57 x0 := x & _M2 58 x1 := x >> _W2 59 y0 := y & _M2 60 y1 := y >> _W2 61 w0 := x0 * y0 62 t := x1*y0 + w0>>_W2 63 w1 := t & _M2 64 w2 := t >> _W2 65 w1 += x0 * y1 66 z1 = x1*y1 + w2 + w1>>_W2 67 z0 = x * y 68 return 69} 70 71// z1<<_W + z0 = x*y + c 72func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { 73 z1, zz0 := mulWW_g(x, y) 74 if z0 = zz0 + c; z0 < zz0 { 75 z1++ 76 } 77 return 78} 79 80// Length of x in bits. 81func bitLen_g(x Word) (n int) { 82 for ; x >= 0x8000; x >>= 16 { 83 n += 16 84 } 85 if x >= 0x80 { 86 x >>= 8 87 n += 8 88 } 89 if x >= 0x8 { 90 x >>= 4 91 n += 4 92 } 93 if x >= 0x2 { 94 x >>= 2 95 n += 2 96 } 97 if x >= 0x1 { 98 n++ 99 } 100 return 101} 102 103// log2 computes the integer binary logarithm of x. 104// The result is the integer n for which 2^n <= x < 2^(n+1). 105// If x == 0, the result is -1. 106func log2(x Word) int { 107 return bitLen(x) - 1 108} 109 110// nlz returns the number of leading zeros in x. 111func nlz(x Word) uint { 112 return uint(_W - bitLen(x)) 113} 114 115// nlz64 returns the number of leading zeros in x. 116func nlz64(x uint64) uint { 117 switch _W { 118 case 32: 119 w := x >> 32 120 if w == 0 { 121 return 32 + nlz(Word(x)) 122 } 123 return nlz(Word(w)) 124 case 64: 125 return nlz(Word(x)) 126 } 127 panic("unreachable") 128} 129 130// q = (u1<<_W + u0 - r)/y 131// Adapted from Warren, Hacker's Delight, p. 152. 132func divWW_g(u1, u0, v Word) (q, r Word) { 133 if u1 >= v { 134 return 1<<_W - 1, 1<<_W - 1 135 } 136 137 s := nlz(v) 138 v <<= s 139 140 vn1 := v >> _W2 141 vn0 := v & _M2 142 un32 := u1<<s | u0>>(_W-s) 143 un10 := u0 << s 144 un1 := un10 >> _W2 145 un0 := un10 & _M2 146 q1 := un32 / vn1 147 rhat := un32 - q1*vn1 148 149 for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 { 150 q1-- 151 rhat += vn1 152 if rhat >= _B2 { 153 break 154 } 155 } 156 157 un21 := un32*_B2 + un1 - q1*v 158 q0 := un21 / vn1 159 rhat = un21 - q0*vn1 160 161 for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 { 162 q0-- 163 rhat += vn1 164 if rhat >= _B2 { 165 break 166 } 167 } 168 169 return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s 170} 171 172// Keep for performance debugging. 173// Using addWW_g is likely slower. 174const use_addWW_g = false 175 176// The resulting carry c is either 0 or 1. 177func addVV_g(z, x, y []Word) (c Word) { 178 if use_addWW_g { 179 for i := range z { 180 c, z[i] = addWW_g(x[i], y[i], c) 181 } 182 return 183 } 184 185 for i, xi := range x[:len(z)] { 186 yi := y[i] 187 zi := xi + yi + c 188 z[i] = zi 189 // see "Hacker's Delight", section 2-12 (overflow detection) 190 c = (xi&yi | (xi|yi)&^zi) >> (_W - 1) 191 } 192 return 193} 194 195// The resulting carry c is either 0 or 1. 196func subVV_g(z, x, y []Word) (c Word) { 197 if use_addWW_g { 198 for i := range z { 199 c, z[i] = subWW_g(x[i], y[i], c) 200 } 201 return 202 } 203 204 for i, xi := range x[:len(z)] { 205 yi := y[i] 206 zi := xi - yi - c 207 z[i] = zi 208 // see "Hacker's Delight", section 2-12 (overflow detection) 209 c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1) 210 } 211 return 212} 213 214// The resulting carry c is either 0 or 1. 215func addVW_g(z, x []Word, y Word) (c Word) { 216 if use_addWW_g { 217 c = y 218 for i := range z { 219 c, z[i] = addWW_g(x[i], c, 0) 220 } 221 return 222 } 223 224 c = y 225 for i, xi := range x[:len(z)] { 226 zi := xi + c 227 z[i] = zi 228 c = xi &^ zi >> (_W - 1) 229 } 230 return 231} 232 233func subVW_g(z, x []Word, y Word) (c Word) { 234 if use_addWW_g { 235 c = y 236 for i := range z { 237 c, z[i] = subWW_g(x[i], c, 0) 238 } 239 return 240 } 241 242 c = y 243 for i, xi := range x[:len(z)] { 244 zi := xi - c 245 z[i] = zi 246 c = (zi &^ xi) >> (_W - 1) 247 } 248 return 249} 250 251func shlVU_g(z, x []Word, s uint) (c Word) { 252 if n := len(z); n > 0 { 253 ŝ := _W - s 254 w1 := x[n-1] 255 c = w1 >> ŝ 256 for i := n - 1; i > 0; i-- { 257 w := w1 258 w1 = x[i-1] 259 z[i] = w<<s | w1>>ŝ 260 } 261 z[0] = w1 << s 262 } 263 return 264} 265 266func shrVU_g(z, x []Word, s uint) (c Word) { 267 if n := len(z); n > 0 { 268 ŝ := _W - s 269 w1 := x[0] 270 c = w1 << ŝ 271 for i := 0; i < n-1; i++ { 272 w := w1 273 w1 = x[i+1] 274 z[i] = w>>s | w1<<ŝ 275 } 276 z[n-1] = w1 >> s 277 } 278 return 279} 280 281func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { 282 c = r 283 for i := range z { 284 c, z[i] = mulAddWWW_g(x[i], y, c) 285 } 286 return 287} 288 289// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g. 290func addMulVVW_g(z, x []Word, y Word) (c Word) { 291 for i := range z { 292 z1, z0 := mulAddWWW_g(x[i], y, z[i]) 293 c, z[i] = addWW_g(z0, c, 0) 294 c += z1 295 } 296 return 297} 298 299func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { 300 r = xn 301 for i := len(z) - 1; i >= 0; i-- { 302 z[i], r = divWW_g(r, x[i], y) 303 } 304 return 305} 306