1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file provides Go implementations of elementary multi-precision
6// arithmetic operations on word vectors. Needed for platforms without
7// assembly implementations of these routines.
8
9package big
10
11// A Word represents a single digit of a multi-precision unsigned integer.
12type Word uintptr
13
14const (
15	// Compute the size _S of a Word in bytes.
16	_m    = ^Word(0)
17	_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
18	_S    = 1 << _logS
19
20	_W = _S << 3 // word size in bits
21	_B = 1 << _W // digit base
22	_M = _B - 1  // digit mask
23
24	_W2 = _W / 2   // half word size in bits
25	_B2 = 1 << _W2 // half digit base
26	_M2 = _B2 - 1  // half digit mask
27)
28
29// ----------------------------------------------------------------------------
30// Elementary operations on words
31//
32// These operations are used by the vector operations below.
33
34// z1<<_W + z0 = x+y+c, with c == 0 or 1
35func addWW_g(x, y, c Word) (z1, z0 Word) {
36	yc := y + c
37	z0 = x + yc
38	if z0 < x || yc < y {
39		z1 = 1
40	}
41	return
42}
43
44// z1<<_W + z0 = x-y-c, with c == 0 or 1
45func subWW_g(x, y, c Word) (z1, z0 Word) {
46	yc := y + c
47	z0 = x - yc
48	if z0 > x || yc < y {
49		z1 = 1
50	}
51	return
52}
53
54// z1<<_W + z0 = x*y
55// Adapted from Warren, Hacker's Delight, p. 132.
56func mulWW_g(x, y Word) (z1, z0 Word) {
57	x0 := x & _M2
58	x1 := x >> _W2
59	y0 := y & _M2
60	y1 := y >> _W2
61	w0 := x0 * y0
62	t := x1*y0 + w0>>_W2
63	w1 := t & _M2
64	w2 := t >> _W2
65	w1 += x0 * y1
66	z1 = x1*y1 + w2 + w1>>_W2
67	z0 = x * y
68	return
69}
70
71// z1<<_W + z0 = x*y + c
72func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
73	z1, zz0 := mulWW_g(x, y)
74	if z0 = zz0 + c; z0 < zz0 {
75		z1++
76	}
77	return
78}
79
80// Length of x in bits.
81func bitLen_g(x Word) (n int) {
82	for ; x >= 0x8000; x >>= 16 {
83		n += 16
84	}
85	if x >= 0x80 {
86		x >>= 8
87		n += 8
88	}
89	if x >= 0x8 {
90		x >>= 4
91		n += 4
92	}
93	if x >= 0x2 {
94		x >>= 2
95		n += 2
96	}
97	if x >= 0x1 {
98		n++
99	}
100	return
101}
102
103// log2 computes the integer binary logarithm of x.
104// The result is the integer n for which 2^n <= x < 2^(n+1).
105// If x == 0, the result is -1.
106func log2(x Word) int {
107	return bitLen(x) - 1
108}
109
110// nlz returns the number of leading zeros in x.
111func nlz(x Word) uint {
112	return uint(_W - bitLen(x))
113}
114
115// nlz64 returns the number of leading zeros in x.
116func nlz64(x uint64) uint {
117	switch _W {
118	case 32:
119		w := x >> 32
120		if w == 0 {
121			return 32 + nlz(Word(x))
122		}
123		return nlz(Word(w))
124	case 64:
125		return nlz(Word(x))
126	}
127	panic("unreachable")
128}
129
130// q = (u1<<_W + u0 - r)/y
131// Adapted from Warren, Hacker's Delight, p. 152.
132func divWW_g(u1, u0, v Word) (q, r Word) {
133	if u1 >= v {
134		return 1<<_W - 1, 1<<_W - 1
135	}
136
137	s := nlz(v)
138	v <<= s
139
140	vn1 := v >> _W2
141	vn0 := v & _M2
142	un32 := u1<<s | u0>>(_W-s)
143	un10 := u0 << s
144	un1 := un10 >> _W2
145	un0 := un10 & _M2
146	q1 := un32 / vn1
147	rhat := un32 - q1*vn1
148
149	for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 {
150		q1--
151		rhat += vn1
152		if rhat >= _B2 {
153			break
154		}
155	}
156
157	un21 := un32*_B2 + un1 - q1*v
158	q0 := un21 / vn1
159	rhat = un21 - q0*vn1
160
161	for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 {
162		q0--
163		rhat += vn1
164		if rhat >= _B2 {
165			break
166		}
167	}
168
169	return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s
170}
171
172// Keep for performance debugging.
173// Using addWW_g is likely slower.
174const use_addWW_g = false
175
176// The resulting carry c is either 0 or 1.
177func addVV_g(z, x, y []Word) (c Word) {
178	if use_addWW_g {
179		for i := range z {
180			c, z[i] = addWW_g(x[i], y[i], c)
181		}
182		return
183	}
184
185	for i, xi := range x[:len(z)] {
186		yi := y[i]
187		zi := xi + yi + c
188		z[i] = zi
189		// see "Hacker's Delight", section 2-12 (overflow detection)
190		c = (xi&yi | (xi|yi)&^zi) >> (_W - 1)
191	}
192	return
193}
194
195// The resulting carry c is either 0 or 1.
196func subVV_g(z, x, y []Word) (c Word) {
197	if use_addWW_g {
198		for i := range z {
199			c, z[i] = subWW_g(x[i], y[i], c)
200		}
201		return
202	}
203
204	for i, xi := range x[:len(z)] {
205		yi := y[i]
206		zi := xi - yi - c
207		z[i] = zi
208		// see "Hacker's Delight", section 2-12 (overflow detection)
209		c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1)
210	}
211	return
212}
213
214// The resulting carry c is either 0 or 1.
215func addVW_g(z, x []Word, y Word) (c Word) {
216	if use_addWW_g {
217		c = y
218		for i := range z {
219			c, z[i] = addWW_g(x[i], c, 0)
220		}
221		return
222	}
223
224	c = y
225	for i, xi := range x[:len(z)] {
226		zi := xi + c
227		z[i] = zi
228		c = xi &^ zi >> (_W - 1)
229	}
230	return
231}
232
233func subVW_g(z, x []Word, y Word) (c Word) {
234	if use_addWW_g {
235		c = y
236		for i := range z {
237			c, z[i] = subWW_g(x[i], c, 0)
238		}
239		return
240	}
241
242	c = y
243	for i, xi := range x[:len(z)] {
244		zi := xi - c
245		z[i] = zi
246		c = (zi &^ xi) >> (_W - 1)
247	}
248	return
249}
250
251func shlVU_g(z, x []Word, s uint) (c Word) {
252	if n := len(z); n > 0 {
253		ŝ := _W - s
254		w1 := x[n-1]
255		c = w1 >> ŝ
256		for i := n - 1; i > 0; i-- {
257			w := w1
258			w1 = x[i-1]
259			z[i] = w<<s | w1>>ŝ
260		}
261		z[0] = w1 << s
262	}
263	return
264}
265
266func shrVU_g(z, x []Word, s uint) (c Word) {
267	if n := len(z); n > 0 {
268		ŝ := _W - s
269		w1 := x[0]
270		c = w1 << ŝ
271		for i := 0; i < n-1; i++ {
272			w := w1
273			w1 = x[i+1]
274			z[i] = w>>s | w1<<ŝ
275		}
276		z[n-1] = w1 >> s
277	}
278	return
279}
280
281func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
282	c = r
283	for i := range z {
284		c, z[i] = mulAddWWW_g(x[i], y, c)
285	}
286	return
287}
288
289// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g.
290func addMulVVW_g(z, x []Word, y Word) (c Word) {
291	for i := range z {
292		z1, z0 := mulAddWWW_g(x[i], y, z[i])
293		c, z[i] = addWW_g(z0, c, 0)
294		c += z1
295	}
296	return
297}
298
299func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) {
300	r = xn
301	for i := len(z) - 1; i >= 0; i-- {
302		z[i], r = divWW_g(r, x[i], y)
303	}
304	return
305}
306