1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file provides Go implementations of elementary multi-precision
6// arithmetic operations on word vectors. These have the suffix _g.
7// These are needed for platforms without assembly implementations of these routines.
8// This file also contains elementary operations that can be implemented
9// sufficiently efficiently in Go.
10
11package big
12
13import "math/bits"
14
15// A Word represents a single digit of a multi-precision unsigned integer.
16type Word uint
17
18const (
19	_S = _W / 8 // word size in bytes
20
21	_W = bits.UintSize // word size in bits
22	_B = 1 << _W       // digit base
23	_M = _B - 1        // digit mask
24)
25
26// Many of the loops in this file are of the form
27//   for i := 0; i < len(z) && i < len(x) && i < len(y); i++
28// i < len(z) is the real condition.
29// However, checking i < len(x) && i < len(y) as well is faster than
30// having the compiler do a bounds check in the body of the loop;
31// remarkably it is even faster than hoisting the bounds check
32// out of the loop, by doing something like
33//   _, _ = x[len(z)-1], y[len(z)-1]
34// There are other ways to hoist the bounds check out of the loop,
35// but the compiler's BCE isn't powerful enough for them (yet?).
36// See the discussion in CL 164966.
37
38// ----------------------------------------------------------------------------
39// Elementary operations on words
40//
41// These operations are used by the vector operations below.
42
43// z1<<_W + z0 = x*y
44func mulWW_g(x, y Word) (z1, z0 Word) {
45	hi, lo := bits.Mul(uint(x), uint(y))
46	return Word(hi), Word(lo)
47}
48
49// z1<<_W + z0 = x*y + c
50func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
51	hi, lo := bits.Mul(uint(x), uint(y))
52	var cc uint
53	lo, cc = bits.Add(lo, uint(c), 0)
54	return Word(hi + cc), Word(lo)
55}
56
57// nlz returns the number of leading zeros in x.
58// Wraps bits.LeadingZeros call for convenience.
59func nlz(x Word) uint {
60	return uint(bits.LeadingZeros(uint(x)))
61}
62
63// q = (u1<<_W + u0 - r)/v
64func divWW_g(u1, u0, v Word) (q, r Word) {
65	qq, rr := bits.Div(uint(u1), uint(u0), uint(v))
66	return Word(qq), Word(rr)
67}
68
69// The resulting carry c is either 0 or 1.
70func addVV_g(z, x, y []Word) (c Word) {
71	// The comment near the top of this file discusses this for loop condition.
72	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
73		zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
74		z[i] = Word(zi)
75		c = Word(cc)
76	}
77	return
78}
79
80// The resulting carry c is either 0 or 1.
81func subVV_g(z, x, y []Word) (c Word) {
82	// The comment near the top of this file discusses this for loop condition.
83	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
84		zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
85		z[i] = Word(zi)
86		c = Word(cc)
87	}
88	return
89}
90
91// The resulting carry c is either 0 or 1.
92func addVW_g(z, x []Word, y Word) (c Word) {
93	c = y
94	// The comment near the top of this file discusses this for loop condition.
95	for i := 0; i < len(z) && i < len(x); i++ {
96		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
97		z[i] = Word(zi)
98		c = Word(cc)
99	}
100	return
101}
102
103// addVWlarge is addVW, but intended for large z.
104// The only difference is that we check on every iteration
105// whether we are done with carries,
106// and if so, switch to a much faster copy instead.
107// This is only a good idea for large z,
108// because the overhead of the check and the function call
109// outweigh the benefits when z is small.
110func addVWlarge(z, x []Word, y Word) (c Word) {
111	c = y
112	// The comment near the top of this file discusses this for loop condition.
113	for i := 0; i < len(z) && i < len(x); i++ {
114		if c == 0 {
115			copy(z[i:], x[i:])
116			return
117		}
118		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
119		z[i] = Word(zi)
120		c = Word(cc)
121	}
122	return
123}
124
125func subVW_g(z, x []Word, y Word) (c Word) {
126	c = y
127	// The comment near the top of this file discusses this for loop condition.
128	for i := 0; i < len(z) && i < len(x); i++ {
129		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
130		z[i] = Word(zi)
131		c = Word(cc)
132	}
133	return
134}
135
136// subVWlarge is to subVW as addVWlarge is to addVW.
137func subVWlarge(z, x []Word, y Word) (c Word) {
138	c = y
139	// The comment near the top of this file discusses this for loop condition.
140	for i := 0; i < len(z) && i < len(x); i++ {
141		if c == 0 {
142			copy(z[i:], x[i:])
143			return
144		}
145		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
146		z[i] = Word(zi)
147		c = Word(cc)
148	}
149	return
150}
151
152func shlVU_g(z, x []Word, s uint) (c Word) {
153	if s == 0 {
154		copy(z, x)
155		return
156	}
157	if len(z) == 0 {
158		return
159	}
160	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
161	ŝ := _W - s
162	ŝ &= _W - 1 // ditto
163	c = x[len(z)-1] >> ŝ
164	for i := len(z) - 1; i > 0; i-- {
165		z[i] = x[i]<<s | x[i-1]>>ŝ
166	}
167	z[0] = x[0] << s
168	return
169}
170
171func shrVU_g(z, x []Word, s uint) (c Word) {
172	if s == 0 {
173		copy(z, x)
174		return
175	}
176	if len(z) == 0 {
177		return
178	}
179	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
180	ŝ := _W - s
181	ŝ &= _W - 1 // ditto
182	c = x[0] << ŝ
183	for i := 0; i < len(z)-1; i++ {
184		z[i] = x[i]>>s | x[i+1]<<ŝ
185	}
186	z[len(z)-1] = x[len(z)-1] >> s
187	return
188}
189
190func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
191	c = r
192	// The comment near the top of this file discusses this for loop condition.
193	for i := 0; i < len(z) && i < len(x); i++ {
194		c, z[i] = mulAddWWW_g(x[i], y, c)
195	}
196	return
197}
198
199func addMulVVW_g(z, x []Word, y Word) (c Word) {
200	// The comment near the top of this file discusses this for loop condition.
201	for i := 0; i < len(z) && i < len(x); i++ {
202		z1, z0 := mulAddWWW_g(x[i], y, z[i])
203		lo, cc := bits.Add(uint(z0), uint(c), 0)
204		c, z[i] = Word(cc), Word(lo)
205		c += z1
206	}
207	return
208}
209
210func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) {
211	r = xn
212	for i := len(z) - 1; i >= 0; i-- {
213		z[i], r = divWW_g(r, x[i], y)
214	}
215	return
216}
217