1// Copyright 2016 The Snappy-Go Authors. All rights reserved.
2// Copyright (c) 2019 Klaus Post. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6package s2
7
8import (
9	"math/bits"
10)
11
12// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
13// Preferably h should be a constant and should always be <32.
14func hash4(u uint64, h uint8) uint32 {
15	const prime4bytes = 2654435761
16	return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
17}
18
19// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
20// Preferably h should be a constant and should always be <64.
21func hash5(u uint64, h uint8) uint32 {
22	const prime5bytes = 889523592379
23	return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
24}
25
26// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
27// Preferably h should be a constant and should always be <64.
28func hash7(u uint64, h uint8) uint32 {
29	const prime7bytes = 58295818150454627
30	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
31}
32
33// hash8 returns the hash of u to fit in a hash table with h bits.
34// Preferably h should be a constant and should always be <64.
35func hash8(u uint64, h uint8) uint32 {
36	const prime8bytes = 0xcf1bbcdcb7a56463
37	return uint32((u * prime8bytes) >> ((64 - h) & 63))
38}
39
40// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
41// assumes that the varint-encoded length of the decompressed bytes has already
42// been written.
43//
44// It also assumes that:
45//	len(dst) >= MaxEncodedLen(len(src)) &&
46// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
47func encodeBlockBetterGo(dst, src []byte) (d int) {
48	// Initialize the hash tables.
49	const (
50		// Long hash matches.
51		lTableBits    = 16
52		maxLTableSize = 1 << lTableBits
53
54		// Short hash matches.
55		sTableBits    = 14
56		maxSTableSize = 1 << sTableBits
57	)
58
59	var lTable [maxLTableSize]uint32
60	var sTable [maxSTableSize]uint32
61
62	// sLimit is when to stop looking for offset/length copies. The inputMargin
63	// lets us use a fast path for emitLiteral in the main loop, while we are
64	// looking for copies.
65	sLimit := len(src) - inputMargin
66	if len(src) < minNonLiteralBlockSize {
67		return 0
68	}
69
70	// Bail if we can't compress to at least this.
71	dstLimit := len(src) - len(src)>>5 - 6
72
73	// nextEmit is where in src the next emitLiteral should start from.
74	nextEmit := 0
75
76	// The encoded form must start with a literal, as there are no previous
77	// bytes to copy, so we start looking for hash matches at s == 1.
78	s := 1
79	cv := load64(src, s)
80
81	// We initialize repeat to 0, so we never match on first attempt
82	repeat := 0
83
84	for {
85		candidateL := 0
86		nextS := 0
87		for {
88			// Next src position to check
89			nextS = s + (s-nextEmit)>>7 + 1
90			if nextS > sLimit {
91				goto emitRemainder
92			}
93			hashL := hash7(cv, lTableBits)
94			hashS := hash4(cv, sTableBits)
95			candidateL = int(lTable[hashL])
96			candidateS := int(sTable[hashS])
97			lTable[hashL] = uint32(s)
98			sTable[hashS] = uint32(s)
99
100			// Check repeat at offset checkRep.
101			const checkRep = 1
102			if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
103				base := s + checkRep
104				// Extend back
105				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
106					i--
107					base--
108				}
109				d += emitLiteral(dst[d:], src[nextEmit:base])
110
111				// Extend forward
112				candidate := s - repeat + 4 + checkRep
113				s += 4 + checkRep
114				for s <= sLimit {
115					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
116						s += bits.TrailingZeros64(diff) >> 3
117						break
118					}
119					s += 8
120					candidate += 8
121				}
122				if nextEmit > 0 {
123					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
124					d += emitRepeat(dst[d:], repeat, s-base)
125				} else {
126					// First match, cannot be repeat.
127					d += emitCopy(dst[d:], repeat, s-base)
128				}
129				nextEmit = s
130				if s >= sLimit {
131					goto emitRemainder
132				}
133
134				cv = load64(src, s)
135				continue
136			}
137
138			if uint32(cv) == load32(src, candidateL) {
139				break
140			}
141
142			// Check our short candidate
143			if uint32(cv) == load32(src, candidateS) {
144				// Try a long candidate at s+1
145				hashL = hash7(cv>>8, lTableBits)
146				candidateL = int(lTable[hashL])
147				lTable[hashL] = uint32(s + 1)
148				if uint32(cv>>8) == load32(src, candidateL) {
149					s++
150					break
151				}
152				// Use our short candidate.
153				candidateL = candidateS
154				break
155			}
156
157			cv = load64(src, nextS)
158			s = nextS
159		}
160
161		// Extend backwards
162		for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
163			candidateL--
164			s--
165		}
166
167		// Bail if we exceed the maximum size.
168		if d+(s-nextEmit) > dstLimit {
169			return 0
170		}
171
172		base := s
173		offset := base - candidateL
174
175		// Extend the 4-byte match as long as possible.
176		s += 4
177		candidateL += 4
178		for s <= len(src)-8 {
179			if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
180				s += bits.TrailingZeros64(diff) >> 3
181				break
182			}
183			s += 8
184			candidateL += 8
185		}
186
187		if offset > 65535 && s-base <= 5 && repeat != offset {
188			// Bail if the match is equal or worse to the encoding.
189			s = nextS + 1
190			if s >= sLimit {
191				goto emitRemainder
192			}
193			cv = load64(src, s)
194			continue
195		}
196
197		d += emitLiteral(dst[d:], src[nextEmit:base])
198		if repeat == offset {
199			d += emitRepeat(dst[d:], offset, s-base)
200		} else {
201			d += emitCopy(dst[d:], offset, s-base)
202			repeat = offset
203		}
204
205		nextEmit = s
206		if s >= sLimit {
207			goto emitRemainder
208		}
209
210		if d > dstLimit {
211			// Do we have space for more, if not bail.
212			return 0
213		}
214		// Index match start+1 (long) and start+2 (short)
215		index0 := base + 1
216		// Index match end-2 (long) and end-1 (short)
217		index1 := s - 2
218
219		cv0 := load64(src, index0)
220		cv1 := load64(src, index1)
221		cv = load64(src, s)
222		lTable[hash7(cv0, lTableBits)] = uint32(index0)
223		lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
224		lTable[hash7(cv1, lTableBits)] = uint32(index1)
225		lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
226		sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
227		sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
228		sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
229	}
230
231emitRemainder:
232	if nextEmit < len(src) {
233		// Bail if we exceed the maximum size.
234		if d+len(src)-nextEmit > dstLimit {
235			return 0
236		}
237		d += emitLiteral(dst[d:], src[nextEmit:])
238	}
239	return d
240}
241