1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file provides Go implementations of elementary multi-precision
6// arithmetic operations on word vectors. Needed for platforms without
7// assembly implementations of these routines.
8
9package big
10
11import "math/bits"
12
13// A Word represents a single digit of a multi-precision unsigned integer.
14type Word uint
15
16const (
17	_S = _W / 8 // word size in bytes
18
19	_W = bits.UintSize // word size in bits
20	_B = 1 << _W       // digit base
21	_M = _B - 1        // digit mask
22
23	_W2 = _W / 2   // half word size in bits
24	_B2 = 1 << _W2 // half digit base
25	_M2 = _B2 - 1  // half digit mask
26)
27
28// ----------------------------------------------------------------------------
29// Elementary operations on words
30//
31// These operations are used by the vector operations below.
32
33// z1<<_W + z0 = x+y+c, with c == 0 or 1
34func addWW_g(x, y, c Word) (z1, z0 Word) {
35	yc := y + c
36	z0 = x + yc
37	if z0 < x || yc < y {
38		z1 = 1
39	}
40	return
41}
42
43// z1<<_W + z0 = x-y-c, with c == 0 or 1
44func subWW_g(x, y, c Word) (z1, z0 Word) {
45	yc := y + c
46	z0 = x - yc
47	if z0 > x || yc < y {
48		z1 = 1
49	}
50	return
51}
52
53// z1<<_W + z0 = x*y
54// Adapted from Warren, Hacker's Delight, p. 132.
55func mulWW_g(x, y Word) (z1, z0 Word) {
56	x0 := x & _M2
57	x1 := x >> _W2
58	y0 := y & _M2
59	y1 := y >> _W2
60	w0 := x0 * y0
61	t := x1*y0 + w0>>_W2
62	w1 := t & _M2
63	w2 := t >> _W2
64	w1 += x0 * y1
65	z1 = x1*y1 + w2 + w1>>_W2
66	z0 = x * y
67	return
68}
69
70// z1<<_W + z0 = x*y + c
71func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
72	z1, zz0 := mulWW_g(x, y)
73	if z0 = zz0 + c; z0 < zz0 {
74		z1++
75	}
76	return
77}
78
79// nlz returns the number of leading zeros in x.
80// Wraps bits.LeadingZeros call for convenience.
81func nlz(x Word) uint {
82	return uint(bits.LeadingZeros(uint(x)))
83}
84
85// q = (u1<<_W + u0 - r)/y
86// Adapted from Warren, Hacker's Delight, p. 152.
87func divWW_g(u1, u0, v Word) (q, r Word) {
88	if u1 >= v {
89		return 1<<_W - 1, 1<<_W - 1
90	}
91
92	s := nlz(v)
93	v <<= s
94
95	vn1 := v >> _W2
96	vn0 := v & _M2
97	un32 := u1<<s | u0>>(_W-s)
98	un10 := u0 << s
99	un1 := un10 >> _W2
100	un0 := un10 & _M2
101	q1 := un32 / vn1
102	rhat := un32 - q1*vn1
103
104	for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 {
105		q1--
106		rhat += vn1
107		if rhat >= _B2 {
108			break
109		}
110	}
111
112	un21 := un32*_B2 + un1 - q1*v
113	q0 := un21 / vn1
114	rhat = un21 - q0*vn1
115
116	for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 {
117		q0--
118		rhat += vn1
119		if rhat >= _B2 {
120			break
121		}
122	}
123
124	return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s
125}
126
127// Keep for performance debugging.
128// Using addWW_g is likely slower.
129const use_addWW_g = false
130
131// The resulting carry c is either 0 or 1.
132func addVV_g(z, x, y []Word) (c Word) {
133	if use_addWW_g {
134		for i := range z {
135			c, z[i] = addWW_g(x[i], y[i], c)
136		}
137		return
138	}
139
140	for i, xi := range x[:len(z)] {
141		yi := y[i]
142		zi := xi + yi + c
143		z[i] = zi
144		// see "Hacker's Delight", section 2-12 (overflow detection)
145		c = (xi&yi | (xi|yi)&^zi) >> (_W - 1)
146	}
147	return
148}
149
150// The resulting carry c is either 0 or 1.
151func subVV_g(z, x, y []Word) (c Word) {
152	if use_addWW_g {
153		for i := range z {
154			c, z[i] = subWW_g(x[i], y[i], c)
155		}
156		return
157	}
158
159	for i, xi := range x[:len(z)] {
160		yi := y[i]
161		zi := xi - yi - c
162		z[i] = zi
163		// see "Hacker's Delight", section 2-12 (overflow detection)
164		c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1)
165	}
166	return
167}
168
169// The resulting carry c is either 0 or 1.
170func addVW_g(z, x []Word, y Word) (c Word) {
171	if use_addWW_g {
172		c = y
173		for i := range z {
174			c, z[i] = addWW_g(x[i], c, 0)
175		}
176		return
177	}
178
179	c = y
180	for i, xi := range x[:len(z)] {
181		zi := xi + c
182		z[i] = zi
183		c = xi &^ zi >> (_W - 1)
184	}
185	return
186}
187
188func subVW_g(z, x []Word, y Word) (c Word) {
189	if use_addWW_g {
190		c = y
191		for i := range z {
192			c, z[i] = subWW_g(x[i], c, 0)
193		}
194		return
195	}
196
197	c = y
198	for i, xi := range x[:len(z)] {
199		zi := xi - c
200		z[i] = zi
201		c = (zi &^ xi) >> (_W - 1)
202	}
203	return
204}
205
206func shlVU_g(z, x []Word, s uint) (c Word) {
207	if n := len(z); n > 0 {
208		ŝ := _W - s
209		w1 := x[n-1]
210		c = w1 >> ŝ
211		for i := n - 1; i > 0; i-- {
212			w := w1
213			w1 = x[i-1]
214			z[i] = w<<s | w1>>ŝ
215		}
216		z[0] = w1 << s
217	}
218	return
219}
220
221func shrVU_g(z, x []Word, s uint) (c Word) {
222	if n := len(z); n > 0 {
223		ŝ := _W - s
224		w1 := x[0]
225		c = w1 << ŝ
226		for i := 0; i < n-1; i++ {
227			w := w1
228			w1 = x[i+1]
229			z[i] = w>>s | w1<<ŝ
230		}
231		z[n-1] = w1 >> s
232	}
233	return
234}
235
236func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
237	c = r
238	for i := range z {
239		c, z[i] = mulAddWWW_g(x[i], y, c)
240	}
241	return
242}
243
244// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g.
245func addMulVVW_g(z, x []Word, y Word) (c Word) {
246	for i := range z {
247		z1, z0 := mulAddWWW_g(x[i], y, z[i])
248		c, z[i] = addWW_g(z0, c, 0)
249		c += z1
250	}
251	return
252}
253
254func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) {
255	r = xn
256	for i := len(z) - 1; i >= 0; i-- {
257		z[i], r = divWW_g(r, x[i], y)
258	}
259	return
260}
261