1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file provides Go implementations of elementary multi-precision 6// arithmetic operations on word vectors. These have the suffix _g. 7// These are needed for platforms without assembly implementations of these routines. 8// This file also contains elementary operations that can be implemented 9// sufficiently efficiently in Go. 10 11package big 12 13import "math/bits" 14 15// A Word represents a single digit of a multi-precision unsigned integer. 16type Word uint 17 18const ( 19 _S = _W / 8 // word size in bytes 20 21 _W = bits.UintSize // word size in bits 22 _B = 1 << _W // digit base 23 _M = _B - 1 // digit mask 24) 25 26// Many of the loops in this file are of the form 27// for i := 0; i < len(z) && i < len(x) && i < len(y); i++ 28// i < len(z) is the real condition. 29// However, checking i < len(x) && i < len(y) as well is faster than 30// having the compiler do a bounds check in the body of the loop; 31// remarkably it is even faster than hoisting the bounds check 32// out of the loop, by doing something like 33// _, _ = x[len(z)-1], y[len(z)-1] 34// There are other ways to hoist the bounds check out of the loop, 35// but the compiler's BCE isn't powerful enough for them (yet?). 36// See the discussion in CL 164966. 37 38// ---------------------------------------------------------------------------- 39// Elementary operations on words 40// 41// These operations are used by the vector operations below. 42 43// z1<<_W + z0 = x*y 44func mulWW_g(x, y Word) (z1, z0 Word) { 45 hi, lo := bits.Mul(uint(x), uint(y)) 46 return Word(hi), Word(lo) 47} 48 49// z1<<_W + z0 = x*y + c 50func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { 51 hi, lo := bits.Mul(uint(x), uint(y)) 52 var cc uint 53 lo, cc = bits.Add(lo, uint(c), 0) 54 return Word(hi + cc), Word(lo) 55} 56 57// nlz returns the number of leading zeros in x. 58// Wraps bits.LeadingZeros call for convenience. 59func nlz(x Word) uint { 60 return uint(bits.LeadingZeros(uint(x))) 61} 62 63// q = (u1<<_W + u0 - r)/v 64func divWW_g(u1, u0, v Word) (q, r Word) { 65 qq, rr := bits.Div(uint(u1), uint(u0), uint(v)) 66 return Word(qq), Word(rr) 67} 68 69// The resulting carry c is either 0 or 1. 70func addVV_g(z, x, y []Word) (c Word) { 71 // The comment near the top of this file discusses this for loop condition. 72 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 73 zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c)) 74 z[i] = Word(zi) 75 c = Word(cc) 76 } 77 return 78} 79 80// The resulting carry c is either 0 or 1. 81func subVV_g(z, x, y []Word) (c Word) { 82 // The comment near the top of this file discusses this for loop condition. 83 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 84 zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) 85 z[i] = Word(zi) 86 c = Word(cc) 87 } 88 return 89} 90 91// The resulting carry c is either 0 or 1. 92func addVW_g(z, x []Word, y Word) (c Word) { 93 c = y 94 // The comment near the top of this file discusses this for loop condition. 95 for i := 0; i < len(z) && i < len(x); i++ { 96 zi, cc := bits.Add(uint(x[i]), uint(c), 0) 97 z[i] = Word(zi) 98 c = Word(cc) 99 } 100 return 101} 102 103// addVWlarge is addVW, but intended for large z. 104// The only difference is that we check on every iteration 105// whether we are done with carries, 106// and if so, switch to a much faster copy instead. 107// This is only a good idea for large z, 108// because the overhead of the check and the function call 109// outweigh the benefits when z is small. 110func addVWlarge(z, x []Word, y Word) (c Word) { 111 c = y 112 // The comment near the top of this file discusses this for loop condition. 113 for i := 0; i < len(z) && i < len(x); i++ { 114 if c == 0 { 115 copy(z[i:], x[i:]) 116 return 117 } 118 zi, cc := bits.Add(uint(x[i]), uint(c), 0) 119 z[i] = Word(zi) 120 c = Word(cc) 121 } 122 return 123} 124 125func subVW_g(z, x []Word, y Word) (c Word) { 126 c = y 127 // The comment near the top of this file discusses this for loop condition. 128 for i := 0; i < len(z) && i < len(x); i++ { 129 zi, cc := bits.Sub(uint(x[i]), uint(c), 0) 130 z[i] = Word(zi) 131 c = Word(cc) 132 } 133 return 134} 135 136// subVWlarge is to subVW as addVWlarge is to addVW. 137func subVWlarge(z, x []Word, y Word) (c Word) { 138 c = y 139 // The comment near the top of this file discusses this for loop condition. 140 for i := 0; i < len(z) && i < len(x); i++ { 141 if c == 0 { 142 copy(z[i:], x[i:]) 143 return 144 } 145 zi, cc := bits.Sub(uint(x[i]), uint(c), 0) 146 z[i] = Word(zi) 147 c = Word(cc) 148 } 149 return 150} 151 152func shlVU_g(z, x []Word, s uint) (c Word) { 153 if s == 0 { 154 copy(z, x) 155 return 156 } 157 if len(z) == 0 { 158 return 159 } 160 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code 161 ŝ := _W - s 162 ŝ &= _W - 1 // ditto 163 c = x[len(z)-1] >> ŝ 164 for i := len(z) - 1; i > 0; i-- { 165 z[i] = x[i]<<s | x[i-1]>>ŝ 166 } 167 z[0] = x[0] << s 168 return 169} 170 171func shrVU_g(z, x []Word, s uint) (c Word) { 172 if s == 0 { 173 copy(z, x) 174 return 175 } 176 if len(z) == 0 { 177 return 178 } 179 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code 180 ŝ := _W - s 181 ŝ &= _W - 1 // ditto 182 c = x[0] << ŝ 183 for i := 0; i < len(z)-1; i++ { 184 z[i] = x[i]>>s | x[i+1]<<ŝ 185 } 186 z[len(z)-1] = x[len(z)-1] >> s 187 return 188} 189 190func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { 191 c = r 192 // The comment near the top of this file discusses this for loop condition. 193 for i := 0; i < len(z) && i < len(x); i++ { 194 c, z[i] = mulAddWWW_g(x[i], y, c) 195 } 196 return 197} 198 199func addMulVVW_g(z, x []Word, y Word) (c Word) { 200 // The comment near the top of this file discusses this for loop condition. 201 for i := 0; i < len(z) && i < len(x); i++ { 202 z1, z0 := mulAddWWW_g(x[i], y, z[i]) 203 lo, cc := bits.Add(uint(z0), uint(c), 0) 204 c, z[i] = Word(cc), Word(lo) 205 c += z1 206 } 207 return 208} 209 210func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { 211 r = xn 212 for i := len(z) - 1; i >= 0; i-- { 213 z[i], r = divWW_g(r, x[i], y) 214 } 215 return 216} 217