1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file provides Go implementations of elementary multi-precision 6// arithmetic operations on word vectors. Needed for platforms without 7// assembly implementations of these routines. 8 9package big 10 11import "math/bits" 12 13// A Word represents a single digit of a multi-precision unsigned integer. 14type Word uint 15 16const ( 17 _S = _W / 8 // word size in bytes 18 19 _W = bits.UintSize // word size in bits 20 _B = 1 << _W // digit base 21 _M = _B - 1 // digit mask 22 23 _W2 = _W / 2 // half word size in bits 24 _B2 = 1 << _W2 // half digit base 25 _M2 = _B2 - 1 // half digit mask 26) 27 28// ---------------------------------------------------------------------------- 29// Elementary operations on words 30// 31// These operations are used by the vector operations below. 32 33// z1<<_W + z0 = x+y+c, with c == 0 or 1 34func addWW_g(x, y, c Word) (z1, z0 Word) { 35 yc := y + c 36 z0 = x + yc 37 if z0 < x || yc < y { 38 z1 = 1 39 } 40 return 41} 42 43// z1<<_W + z0 = x-y-c, with c == 0 or 1 44func subWW_g(x, y, c Word) (z1, z0 Word) { 45 yc := y + c 46 z0 = x - yc 47 if z0 > x || yc < y { 48 z1 = 1 49 } 50 return 51} 52 53// z1<<_W + z0 = x*y 54// Adapted from Warren, Hacker's Delight, p. 132. 55func mulWW_g(x, y Word) (z1, z0 Word) { 56 x0 := x & _M2 57 x1 := x >> _W2 58 y0 := y & _M2 59 y1 := y >> _W2 60 w0 := x0 * y0 61 t := x1*y0 + w0>>_W2 62 w1 := t & _M2 63 w2 := t >> _W2 64 w1 += x0 * y1 65 z1 = x1*y1 + w2 + w1>>_W2 66 z0 = x * y 67 return 68} 69 70// z1<<_W + z0 = x*y + c 71func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { 72 z1, zz0 := mulWW_g(x, y) 73 if z0 = zz0 + c; z0 < zz0 { 74 z1++ 75 } 76 return 77} 78 79// nlz returns the number of leading zeros in x. 80// Wraps bits.LeadingZeros call for convenience. 81func nlz(x Word) uint { 82 return uint(bits.LeadingZeros(uint(x))) 83} 84 85// q = (u1<<_W + u0 - r)/y 86// Adapted from Warren, Hacker's Delight, p. 152. 87func divWW_g(u1, u0, v Word) (q, r Word) { 88 if u1 >= v { 89 return 1<<_W - 1, 1<<_W - 1 90 } 91 92 s := nlz(v) 93 v <<= s 94 95 vn1 := v >> _W2 96 vn0 := v & _M2 97 un32 := u1<<s | u0>>(_W-s) 98 un10 := u0 << s 99 un1 := un10 >> _W2 100 un0 := un10 & _M2 101 q1 := un32 / vn1 102 rhat := un32 - q1*vn1 103 104 for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 { 105 q1-- 106 rhat += vn1 107 if rhat >= _B2 { 108 break 109 } 110 } 111 112 un21 := un32*_B2 + un1 - q1*v 113 q0 := un21 / vn1 114 rhat = un21 - q0*vn1 115 116 for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 { 117 q0-- 118 rhat += vn1 119 if rhat >= _B2 { 120 break 121 } 122 } 123 124 return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s 125} 126 127// Keep for performance debugging. 128// Using addWW_g is likely slower. 129const use_addWW_g = false 130 131// The resulting carry c is either 0 or 1. 132func addVV_g(z, x, y []Word) (c Word) { 133 if use_addWW_g { 134 for i := range z { 135 c, z[i] = addWW_g(x[i], y[i], c) 136 } 137 return 138 } 139 140 for i, xi := range x[:len(z)] { 141 yi := y[i] 142 zi := xi + yi + c 143 z[i] = zi 144 // see "Hacker's Delight", section 2-12 (overflow detection) 145 c = (xi&yi | (xi|yi)&^zi) >> (_W - 1) 146 } 147 return 148} 149 150// The resulting carry c is either 0 or 1. 151func subVV_g(z, x, y []Word) (c Word) { 152 if use_addWW_g { 153 for i := range z { 154 c, z[i] = subWW_g(x[i], y[i], c) 155 } 156 return 157 } 158 159 for i, xi := range x[:len(z)] { 160 yi := y[i] 161 zi := xi - yi - c 162 z[i] = zi 163 // see "Hacker's Delight", section 2-12 (overflow detection) 164 c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1) 165 } 166 return 167} 168 169// The resulting carry c is either 0 or 1. 170func addVW_g(z, x []Word, y Word) (c Word) { 171 if use_addWW_g { 172 c = y 173 for i := range z { 174 c, z[i] = addWW_g(x[i], c, 0) 175 } 176 return 177 } 178 179 c = y 180 for i, xi := range x[:len(z)] { 181 zi := xi + c 182 z[i] = zi 183 c = xi &^ zi >> (_W - 1) 184 } 185 return 186} 187 188func subVW_g(z, x []Word, y Word) (c Word) { 189 if use_addWW_g { 190 c = y 191 for i := range z { 192 c, z[i] = subWW_g(x[i], c, 0) 193 } 194 return 195 } 196 197 c = y 198 for i, xi := range x[:len(z)] { 199 zi := xi - c 200 z[i] = zi 201 c = (zi &^ xi) >> (_W - 1) 202 } 203 return 204} 205 206func shlVU_g(z, x []Word, s uint) (c Word) { 207 if n := len(z); n > 0 { 208 ŝ := _W - s 209 w1 := x[n-1] 210 c = w1 >> ŝ 211 for i := n - 1; i > 0; i-- { 212 w := w1 213 w1 = x[i-1] 214 z[i] = w<<s | w1>>ŝ 215 } 216 z[0] = w1 << s 217 } 218 return 219} 220 221func shrVU_g(z, x []Word, s uint) (c Word) { 222 if n := len(z); n > 0 { 223 ŝ := _W - s 224 w1 := x[0] 225 c = w1 << ŝ 226 for i := 0; i < n-1; i++ { 227 w := w1 228 w1 = x[i+1] 229 z[i] = w>>s | w1<<ŝ 230 } 231 z[n-1] = w1 >> s 232 } 233 return 234} 235 236func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { 237 c = r 238 for i := range z { 239 c, z[i] = mulAddWWW_g(x[i], y, c) 240 } 241 return 242} 243 244// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g. 245func addMulVVW_g(z, x []Word, y Word) (c Word) { 246 for i := range z { 247 z1, z0 := mulAddWWW_g(x[i], y, z[i]) 248 c, z[i] = addWW_g(z0, c, 0) 249 c += z1 250 } 251 return 252} 253 254func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { 255 r = xn 256 for i := len(z) - 1; i >= 0; i-- { 257 z[i], r = divWW_g(r, x[i], y) 258 } 259 return 260} 261