1// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
2// at http://cyan4973.github.io/xxHash/.
3package xxhash
4
5import (
6	"encoding/binary"
7	"errors"
8	"math/bits"
9)
10
11const (
12	prime1 uint64 = 11400714785074694791
13	prime2 uint64 = 14029467366897019727
14	prime3 uint64 = 1609587929392839161
15	prime4 uint64 = 9650029242287828579
16	prime5 uint64 = 2870177450012600261
17)
18
19// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
20// possible in the Go code is worth a small (but measurable) performance boost
21// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
22// convenience in the Go code in a few places where we need to intentionally
23// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
24// result overflows a uint64).
25var (
26	prime1v = prime1
27	prime2v = prime2
28	prime3v = prime3
29	prime4v = prime4
30	prime5v = prime5
31)
32
33// Digest implements hash.Hash64.
34type Digest struct {
35	v1    uint64
36	v2    uint64
37	v3    uint64
38	v4    uint64
39	total uint64
40	mem   [32]byte
41	n     int // how much of mem is used
42}
43
44// New creates a new Digest that computes the 64-bit xxHash algorithm.
45func New() *Digest {
46	var d Digest
47	d.Reset()
48	return &d
49}
50
51// Reset clears the Digest's state so that it can be reused.
52func (d *Digest) Reset() {
53	d.v1 = prime1v + prime2
54	d.v2 = prime2
55	d.v3 = 0
56	d.v4 = -prime1v
57	d.total = 0
58	d.n = 0
59}
60
61// Size always returns 8 bytes.
62func (d *Digest) Size() int { return 8 }
63
64// BlockSize always returns 32 bytes.
65func (d *Digest) BlockSize() int { return 32 }
66
67// Write adds more data to d. It always returns len(b), nil.
68func (d *Digest) Write(b []byte) (n int, err error) {
69	n = len(b)
70	d.total += uint64(n)
71
72	if d.n+n < 32 {
73		// This new data doesn't even fill the current block.
74		copy(d.mem[d.n:], b)
75		d.n += n
76		return
77	}
78
79	if d.n > 0 {
80		// Finish off the partial block.
81		copy(d.mem[d.n:], b)
82		d.v1 = round(d.v1, u64(d.mem[0:8]))
83		d.v2 = round(d.v2, u64(d.mem[8:16]))
84		d.v3 = round(d.v3, u64(d.mem[16:24]))
85		d.v4 = round(d.v4, u64(d.mem[24:32]))
86		b = b[32-d.n:]
87		d.n = 0
88	}
89
90	if len(b) >= 32 {
91		// One or more full blocks left.
92		nw := writeBlocks(d, b)
93		b = b[nw:]
94	}
95
96	// Store any remaining partial block.
97	copy(d.mem[:], b)
98	d.n = len(b)
99
100	return
101}
102
103// Sum appends the current hash to b and returns the resulting slice.
104func (d *Digest) Sum(b []byte) []byte {
105	s := d.Sum64()
106	return append(
107		b,
108		byte(s>>56),
109		byte(s>>48),
110		byte(s>>40),
111		byte(s>>32),
112		byte(s>>24),
113		byte(s>>16),
114		byte(s>>8),
115		byte(s),
116	)
117}
118
119// Sum64 returns the current hash.
120func (d *Digest) Sum64() uint64 {
121	var h uint64
122
123	if d.total >= 32 {
124		v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
125		h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
126		h = mergeRound(h, v1)
127		h = mergeRound(h, v2)
128		h = mergeRound(h, v3)
129		h = mergeRound(h, v4)
130	} else {
131		h = d.v3 + prime5
132	}
133
134	h += d.total
135
136	i, end := 0, d.n
137	for ; i+8 <= end; i += 8 {
138		k1 := round(0, u64(d.mem[i:i+8]))
139		h ^= k1
140		h = rol27(h)*prime1 + prime4
141	}
142	if i+4 <= end {
143		h ^= uint64(u32(d.mem[i:i+4])) * prime1
144		h = rol23(h)*prime2 + prime3
145		i += 4
146	}
147	for i < end {
148		h ^= uint64(d.mem[i]) * prime5
149		h = rol11(h) * prime1
150		i++
151	}
152
153	h ^= h >> 33
154	h *= prime2
155	h ^= h >> 29
156	h *= prime3
157	h ^= h >> 32
158
159	return h
160}
161
162const (
163	magic         = "xxh\x06"
164	marshaledSize = len(magic) + 8*5 + 32
165)
166
167// MarshalBinary implements the encoding.BinaryMarshaler interface.
168func (d *Digest) MarshalBinary() ([]byte, error) {
169	b := make([]byte, 0, marshaledSize)
170	b = append(b, magic...)
171	b = appendUint64(b, d.v1)
172	b = appendUint64(b, d.v2)
173	b = appendUint64(b, d.v3)
174	b = appendUint64(b, d.v4)
175	b = appendUint64(b, d.total)
176	b = append(b, d.mem[:d.n]...)
177	b = b[:len(b)+len(d.mem)-d.n]
178	return b, nil
179}
180
181// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
182func (d *Digest) UnmarshalBinary(b []byte) error {
183	if len(b) < len(magic) || string(b[:len(magic)]) != magic {
184		return errors.New("xxhash: invalid hash state identifier")
185	}
186	if len(b) != marshaledSize {
187		return errors.New("xxhash: invalid hash state size")
188	}
189	b = b[len(magic):]
190	b, d.v1 = consumeUint64(b)
191	b, d.v2 = consumeUint64(b)
192	b, d.v3 = consumeUint64(b)
193	b, d.v4 = consumeUint64(b)
194	b, d.total = consumeUint64(b)
195	copy(d.mem[:], b)
196	b = b[len(d.mem):]
197	d.n = int(d.total % uint64(len(d.mem)))
198	return nil
199}
200
201func appendUint64(b []byte, x uint64) []byte {
202	var a [8]byte
203	binary.LittleEndian.PutUint64(a[:], x)
204	return append(b, a[:]...)
205}
206
207func consumeUint64(b []byte) ([]byte, uint64) {
208	x := u64(b)
209	return b[8:], x
210}
211
212func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
213func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
214
215func round(acc, input uint64) uint64 {
216	acc += input * prime2
217	acc = rol31(acc)
218	acc *= prime1
219	return acc
220}
221
222func mergeRound(acc, val uint64) uint64 {
223	val = round(0, val)
224	acc ^= val
225	acc = acc*prime1 + prime4
226	return acc
227}
228
229func rol1(x uint64) uint64  { return bits.RotateLeft64(x, 1) }
230func rol7(x uint64) uint64  { return bits.RotateLeft64(x, 7) }
231func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
232func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
233func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
234func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
235func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
236func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
237