1// Copyright 2016 The Snappy-Go Authors. All rights reserved.
2// Copyright (c) 2019 Klaus Post. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6package s2
7
8import (
9	"bytes"
10	"encoding/binary"
11	"math/bits"
12)
13
14func load32(b []byte, i int) uint32 {
15	return binary.LittleEndian.Uint32(b[i:])
16}
17
18func load64(b []byte, i int) uint64 {
19	return binary.LittleEndian.Uint64(b[i:])
20}
21
22// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
23// Preferably h should be a constant and should always be <64.
24func hash6(u uint64, h uint8) uint32 {
25	const prime6bytes = 227718039650203
26	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
27}
28
29func encodeGo(dst, src []byte) []byte {
30	if n := MaxEncodedLen(len(src)); n < 0 {
31		panic(ErrTooLarge)
32	} else if len(dst) < n {
33		dst = make([]byte, n)
34	}
35
36	// The block starts with the varint-encoded length of the decompressed bytes.
37	d := binary.PutUvarint(dst, uint64(len(src)))
38
39	if len(src) == 0 {
40		return dst[:d]
41	}
42	if len(src) < minNonLiteralBlockSize {
43		d += emitLiteral(dst[d:], src)
44		return dst[:d]
45	}
46	n := encodeBlockGo(dst[d:], src)
47	if n > 0 {
48		d += n
49		return dst[:d]
50	}
51	// Not compressible
52	d += emitLiteral(dst[d:], src)
53	return dst[:d]
54}
55
56// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
57// assumes that the varint-encoded length of the decompressed bytes has already
58// been written.
59//
60// It also assumes that:
61//	len(dst) >= MaxEncodedLen(len(src)) &&
62// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
63func encodeBlockGo(dst, src []byte) (d int) {
64	// Initialize the hash table.
65	const (
66		tableBits    = 14
67		maxTableSize = 1 << tableBits
68
69		debug = false
70	)
71
72	var table [maxTableSize]uint32
73
74	// sLimit is when to stop looking for offset/length copies. The inputMargin
75	// lets us use a fast path for emitLiteral in the main loop, while we are
76	// looking for copies.
77	sLimit := len(src) - inputMargin
78
79	// Bail if we can't compress to at least this.
80	dstLimit := len(src) - len(src)>>5 - 5
81
82	// nextEmit is where in src the next emitLiteral should start from.
83	nextEmit := 0
84
85	// The encoded form must start with a literal, as there are no previous
86	// bytes to copy, so we start looking for hash matches at s == 1.
87	s := 1
88	cv := load64(src, s)
89
90	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
91	repeat := 1
92
93	for {
94		candidate := 0
95		for {
96			// Next src position to check
97			nextS := s + (s-nextEmit)>>6 + 4
98			if nextS > sLimit {
99				goto emitRemainder
100			}
101			hash0 := hash6(cv, tableBits)
102			hash1 := hash6(cv>>8, tableBits)
103			candidate = int(table[hash0])
104			candidate2 := int(table[hash1])
105			table[hash0] = uint32(s)
106			table[hash1] = uint32(s + 1)
107			hash2 := hash6(cv>>16, tableBits)
108
109			// Check repeat at offset checkRep.
110			const checkRep = 1
111			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
112				base := s + checkRep
113				// Extend back
114				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
115					i--
116					base--
117				}
118				d += emitLiteral(dst[d:], src[nextEmit:base])
119
120				// Extend forward
121				candidate := s - repeat + 4 + checkRep
122				s += 4 + checkRep
123				for s <= sLimit {
124					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
125						s += bits.TrailingZeros64(diff) >> 3
126						break
127					}
128					s += 8
129					candidate += 8
130				}
131				if debug {
132					// Validate match.
133					if s <= candidate {
134						panic("s <= candidate")
135					}
136					a := src[base:s]
137					b := src[base-repeat : base-repeat+(s-base)]
138					if !bytes.Equal(a, b) {
139						panic("mismatch")
140					}
141				}
142				if nextEmit > 0 {
143					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
144					d += emitRepeat(dst[d:], repeat, s-base)
145				} else {
146					// First match, cannot be repeat.
147					d += emitCopy(dst[d:], repeat, s-base)
148				}
149				nextEmit = s
150				if s >= sLimit {
151					goto emitRemainder
152				}
153
154				cv = load64(src, s)
155				continue
156			}
157
158			if uint32(cv) == load32(src, candidate) {
159				break
160			}
161			candidate = int(table[hash2])
162			if uint32(cv>>8) == load32(src, candidate2) {
163				table[hash2] = uint32(s + 2)
164				candidate = candidate2
165				s++
166				break
167			}
168			table[hash2] = uint32(s + 2)
169			if uint32(cv>>16) == load32(src, candidate) {
170				s += 2
171				break
172			}
173
174			cv = load64(src, nextS)
175			s = nextS
176		}
177
178		// Extend backwards.
179		// The top bytes will be rechecked to get the full match.
180		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
181			candidate--
182			s--
183		}
184
185		// Bail if we exceed the maximum size.
186		if d+(s-nextEmit) > dstLimit {
187			return 0
188		}
189
190		// A 4-byte match has been found. We'll later see if more than 4 bytes
191		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
192		// them as literal bytes.
193
194		d += emitLiteral(dst[d:], src[nextEmit:s])
195
196		// Call emitCopy, and then see if another emitCopy could be our next
197		// move. Repeat until we find no match for the input immediately after
198		// what was consumed by the last emitCopy call.
199		//
200		// If we exit this loop normally then we need to call emitLiteral next,
201		// though we don't yet know how big the literal will be. We handle that
202		// by proceeding to the next iteration of the main loop. We also can
203		// exit this loop via goto if we get close to exhausting the input.
204		for {
205			// Invariant: we have a 4-byte match at s, and no need to emit any
206			// literal bytes prior to s.
207			base := s
208			repeat = base - candidate
209
210			// Extend the 4-byte match as long as possible.
211			s += 4
212			candidate += 4
213			for s <= len(src)-8 {
214				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
215					s += bits.TrailingZeros64(diff) >> 3
216					break
217				}
218				s += 8
219				candidate += 8
220			}
221
222			d += emitCopy(dst[d:], repeat, s-base)
223			if debug {
224				// Validate match.
225				if s <= candidate {
226					panic("s <= candidate")
227				}
228				a := src[base:s]
229				b := src[base-repeat : base-repeat+(s-base)]
230				if !bytes.Equal(a, b) {
231					panic("mismatch")
232				}
233			}
234
235			nextEmit = s
236			if s >= sLimit {
237				goto emitRemainder
238			}
239
240			if d > dstLimit {
241				// Do we have space for more, if not bail.
242				return 0
243			}
244			// Check for an immediate match, otherwise start search at s+1
245			x := load64(src, s-2)
246			m2Hash := hash6(x, tableBits)
247			currHash := hash6(x>>16, tableBits)
248			candidate = int(table[currHash])
249			table[m2Hash] = uint32(s - 2)
250			table[currHash] = uint32(s)
251			if debug && s == candidate {
252				panic("s == candidate")
253			}
254			if uint32(x>>16) != load32(src, candidate) {
255				cv = load64(src, s+1)
256				s++
257				break
258			}
259		}
260	}
261
262emitRemainder:
263	if nextEmit < len(src) {
264		// Bail if we exceed the maximum size.
265		if d+len(src)-nextEmit > dstLimit {
266			return 0
267		}
268		d += emitLiteral(dst[d:], src[nextEmit:])
269	}
270	return d
271}
272
273func encodeBlockSnappyGo(dst, src []byte) (d int) {
274	// Initialize the hash table.
275	const (
276		tableBits    = 14
277		maxTableSize = 1 << tableBits
278	)
279
280	var table [maxTableSize]uint32
281
282	// sLimit is when to stop looking for offset/length copies. The inputMargin
283	// lets us use a fast path for emitLiteral in the main loop, while we are
284	// looking for copies.
285	sLimit := len(src) - inputMargin
286
287	// Bail if we can't compress to at least this.
288	dstLimit := len(src) - len(src)>>5 - 5
289
290	// nextEmit is where in src the next emitLiteral should start from.
291	nextEmit := 0
292
293	// The encoded form must start with a literal, as there are no previous
294	// bytes to copy, so we start looking for hash matches at s == 1.
295	s := 1
296	cv := load64(src, s)
297
298	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
299	repeat := 1
300
301	for {
302		candidate := 0
303		for {
304			// Next src position to check
305			nextS := s + (s-nextEmit)>>6 + 4
306			if nextS > sLimit {
307				goto emitRemainder
308			}
309			hash0 := hash6(cv, tableBits)
310			hash1 := hash6(cv>>8, tableBits)
311			candidate = int(table[hash0])
312			candidate2 := int(table[hash1])
313			table[hash0] = uint32(s)
314			table[hash1] = uint32(s + 1)
315			hash2 := hash6(cv>>16, tableBits)
316
317			// Check repeat at offset checkRep.
318			const checkRep = 1
319			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
320				base := s + checkRep
321				// Extend back
322				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
323					i--
324					base--
325				}
326				d += emitLiteral(dst[d:], src[nextEmit:base])
327
328				// Extend forward
329				candidate := s - repeat + 4 + checkRep
330				s += 4 + checkRep
331				for s <= sLimit {
332					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
333						s += bits.TrailingZeros64(diff) >> 3
334						break
335					}
336					s += 8
337					candidate += 8
338				}
339
340				d += emitCopyNoRepeat(dst[d:], repeat, s-base)
341				nextEmit = s
342				if s >= sLimit {
343					goto emitRemainder
344				}
345
346				cv = load64(src, s)
347				continue
348			}
349
350			if uint32(cv) == load32(src, candidate) {
351				break
352			}
353			candidate = int(table[hash2])
354			if uint32(cv>>8) == load32(src, candidate2) {
355				table[hash2] = uint32(s + 2)
356				candidate = candidate2
357				s++
358				break
359			}
360			table[hash2] = uint32(s + 2)
361			if uint32(cv>>16) == load32(src, candidate) {
362				s += 2
363				break
364			}
365
366			cv = load64(src, nextS)
367			s = nextS
368		}
369
370		// Extend backwards
371		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
372			candidate--
373			s--
374		}
375
376		// Bail if we exceed the maximum size.
377		if d+(s-nextEmit) > dstLimit {
378			return 0
379		}
380
381		// A 4-byte match has been found. We'll later see if more than 4 bytes
382		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
383		// them as literal bytes.
384
385		d += emitLiteral(dst[d:], src[nextEmit:s])
386
387		// Call emitCopy, and then see if another emitCopy could be our next
388		// move. Repeat until we find no match for the input immediately after
389		// what was consumed by the last emitCopy call.
390		//
391		// If we exit this loop normally then we need to call emitLiteral next,
392		// though we don't yet know how big the literal will be. We handle that
393		// by proceeding to the next iteration of the main loop. We also can
394		// exit this loop via goto if we get close to exhausting the input.
395		for {
396			// Invariant: we have a 4-byte match at s, and no need to emit any
397			// literal bytes prior to s.
398			base := s
399			repeat = base - candidate
400
401			// Extend the 4-byte match as long as possible.
402			s += 4
403			candidate += 4
404			for s <= len(src)-8 {
405				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
406					s += bits.TrailingZeros64(diff) >> 3
407					break
408				}
409				s += 8
410				candidate += 8
411			}
412
413			d += emitCopyNoRepeat(dst[d:], repeat, s-base)
414			if false {
415				// Validate match.
416				a := src[base:s]
417				b := src[base-repeat : base-repeat+(s-base)]
418				if !bytes.Equal(a, b) {
419					panic("mismatch")
420				}
421			}
422
423			nextEmit = s
424			if s >= sLimit {
425				goto emitRemainder
426			}
427
428			if d > dstLimit {
429				// Do we have space for more, if not bail.
430				return 0
431			}
432			// Check for an immediate match, otherwise start search at s+1
433			x := load64(src, s-2)
434			m2Hash := hash6(x, tableBits)
435			currHash := hash6(x>>16, tableBits)
436			candidate = int(table[currHash])
437			table[m2Hash] = uint32(s - 2)
438			table[currHash] = uint32(s)
439			if uint32(x>>16) != load32(src, candidate) {
440				cv = load64(src, s+1)
441				s++
442				break
443			}
444		}
445	}
446
447emitRemainder:
448	if nextEmit < len(src) {
449		// Bail if we exceed the maximum size.
450		if d+len(src)-nextEmit > dstLimit {
451			return 0
452		}
453		d += emitLiteral(dst[d:], src[nextEmit:])
454	}
455	return d
456}
457