1// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8	"fmt"
9	"math/bits"
10)
11
12const (
13	bestLongTableBits = 20                     // Bits used in the long match table
14	bestLongTableSize = 1 << bestLongTableBits // Size of the table
15
16	// Note: Increasing the short table bits or making the hash shorter
17	// can actually lead to compression degradation since it will 'steal' more from the
18	// long match table and match offsets are quite big.
19	// This greatly depends on the type of input.
20	bestShortTableBits = 16                      // Bits used in the short match table
21	bestShortTableSize = 1 << bestShortTableBits // Size of the table
22)
23
24// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
25// The long match table contains the previous entry with the same hash,
26// effectively making it a "chain" of length 2.
27// When we find a long match we choose between the two values and select the longest.
28// When we find a short match, after checking the long, we check if we can find a long at n+1
29// and that it is longer (lazy matching).
30type bestFastEncoder struct {
31	fastBase
32	table         [bestShortTableSize]prevEntry
33	longTable     [bestLongTableSize]prevEntry
34	dictTable     []prevEntry
35	dictLongTable []prevEntry
36}
37
38// Encode improves compression...
39func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
40	const (
41		// Input margin is the number of bytes we read (8)
42		// and the maximum we will read ahead (2)
43		inputMargin            = 8 + 4
44		minNonLiteralBlockSize = 16
45	)
46
47	// Protect against e.cur wraparound.
48	for e.cur >= bufferReset {
49		if len(e.hist) == 0 {
50			for i := range e.table[:] {
51				e.table[i] = prevEntry{}
52			}
53			for i := range e.longTable[:] {
54				e.longTable[i] = prevEntry{}
55			}
56			e.cur = e.maxMatchOff
57			break
58		}
59		// Shift down everything in the table that isn't already too far away.
60		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
61		for i := range e.table[:] {
62			v := e.table[i].offset
63			v2 := e.table[i].prev
64			if v < minOff {
65				v = 0
66				v2 = 0
67			} else {
68				v = v - e.cur + e.maxMatchOff
69				if v2 < minOff {
70					v2 = 0
71				} else {
72					v2 = v2 - e.cur + e.maxMatchOff
73				}
74			}
75			e.table[i] = prevEntry{
76				offset: v,
77				prev:   v2,
78			}
79		}
80		for i := range e.longTable[:] {
81			v := e.longTable[i].offset
82			v2 := e.longTable[i].prev
83			if v < minOff {
84				v = 0
85				v2 = 0
86			} else {
87				v = v - e.cur + e.maxMatchOff
88				if v2 < minOff {
89					v2 = 0
90				} else {
91					v2 = v2 - e.cur + e.maxMatchOff
92				}
93			}
94			e.longTable[i] = prevEntry{
95				offset: v,
96				prev:   v2,
97			}
98		}
99		e.cur = e.maxMatchOff
100		break
101	}
102
103	s := e.addBlock(src)
104	blk.size = len(src)
105	if len(src) < minNonLiteralBlockSize {
106		blk.extraLits = len(src)
107		blk.literals = blk.literals[:len(src)]
108		copy(blk.literals, src)
109		return
110	}
111
112	// Override src
113	src = e.hist
114	sLimit := int32(len(src)) - inputMargin
115	const kSearchStrength = 10
116
117	// nextEmit is where in src the next emitLiteral should start from.
118	nextEmit := s
119	cv := load6432(src, s)
120
121	// Relative offsets
122	offset1 := int32(blk.recentOffsets[0])
123	offset2 := int32(blk.recentOffsets[1])
124	offset3 := int32(blk.recentOffsets[2])
125
126	addLiterals := func(s *seq, until int32) {
127		if until == nextEmit {
128			return
129		}
130		blk.literals = append(blk.literals, src[nextEmit:until]...)
131		s.litLen = uint32(until - nextEmit)
132	}
133	_ = addLiterals
134
135	if debugEncoder {
136		println("recent offsets:", blk.recentOffsets)
137	}
138
139encodeLoop:
140	for {
141		// We allow the encoder to optionally turn off repeat offsets across blocks
142		canRepeat := len(blk.sequences) > 2
143
144		if debugAsserts && canRepeat && offset1 == 0 {
145			panic("offset0 was 0")
146		}
147
148		type match struct {
149			offset int32
150			s      int32
151			length int32
152			rep    int32
153		}
154		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
155			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
156				return match{offset: offset, s: s}
157			}
158			return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
159		}
160
161		bestOf := func(a, b match) match {
162			aScore := b.s - a.s + a.length
163			bScore := a.s - b.s + b.length
164			if a.rep < 0 {
165				aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
166			}
167			if b.rep < 0 {
168				bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
169			}
170			if aScore >= bScore {
171				return a
172			}
173			return b
174		}
175		const goodEnough = 100
176
177		nextHashL := hash8(cv, bestLongTableBits)
178		nextHashS := hash4x64(cv, bestShortTableBits)
179		candidateL := e.longTable[nextHashL]
180		candidateS := e.table[nextHashS]
181
182		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
183		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
184		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
185		if canRepeat && best.length < goodEnough {
186			best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
187			best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
188			best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
189			if best.length > 0 {
190				best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
191				best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
192				best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
193			}
194		}
195		// Load next and check...
196		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
197		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
198
199		// Look far ahead, unless we have a really long match already...
200		if best.length < goodEnough {
201			// No match found, move forward on input, no need to check forward...
202			if best.length < 4 {
203				s += 1 + (s-nextEmit)>>(kSearchStrength-1)
204				if s >= sLimit {
205					break encodeLoop
206				}
207				cv = load6432(src, s)
208				continue
209			}
210
211			s++
212			candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
213			cv = load6432(src, s)
214			cv2 := load6432(src, s+1)
215			candidateL = e.longTable[hash8(cv, bestLongTableBits)]
216			candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
217
218			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
219			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
220			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
221			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
222			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
223
224			// See if we can find a better match by checking where the current best ends.
225			// Use that offset to see if we can find a better full match.
226			if sAt := best.s + best.length; sAt < sLimit {
227				nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
228				candidateEnd := e.longTable[nextHashL]
229				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
230					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
231					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
232						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
233					}
234					best = bestEnd
235				}
236			}
237		}
238
239		// We have a match, we can store the forward value
240		if best.rep > 0 {
241			s = best.s
242			var seq seq
243			seq.matchLen = uint32(best.length - zstdMinMatch)
244
245			// We might be able to match backwards.
246			// Extend as long as we can.
247			start := best.s
248			// We end the search early, so we don't risk 0 literals
249			// and have to do special offset treatment.
250			startLimit := nextEmit + 1
251
252			tMin := s - e.maxMatchOff
253			if tMin < 0 {
254				tMin = 0
255			}
256			repIndex := best.offset
257			for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
258				repIndex--
259				start--
260				seq.matchLen++
261			}
262			addLiterals(&seq, start)
263
264			// rep 0
265			seq.offset = uint32(best.rep)
266			if debugSequences {
267				println("repeat sequence", seq, "next s:", s)
268			}
269			blk.sequences = append(blk.sequences, seq)
270
271			// Index match start+1 (long) -> s - 1
272			index0 := s
273			s = best.s + best.length
274
275			nextEmit = s
276			if s >= sLimit {
277				if debugEncoder {
278					println("repeat ended", s, best.length)
279
280				}
281				break encodeLoop
282			}
283			// Index skipped...
284			off := index0 + e.cur
285			for index0 < s-1 {
286				cv0 := load6432(src, index0)
287				h0 := hash8(cv0, bestLongTableBits)
288				h1 := hash4x64(cv0, bestShortTableBits)
289				e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
290				e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
291				off++
292				index0++
293			}
294			switch best.rep {
295			case 2:
296				offset1, offset2 = offset2, offset1
297			case 3:
298				offset1, offset2, offset3 = offset3, offset1, offset2
299			}
300			cv = load6432(src, s)
301			continue
302		}
303
304		// A 4-byte match has been found. Update recent offsets.
305		// We'll later see if more than 4 bytes.
306		s = best.s
307		t := best.offset
308		offset1, offset2, offset3 = s-t, offset1, offset2
309
310		if debugAsserts && s <= t {
311			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
312		}
313
314		if debugAsserts && canRepeat && int(offset1) > len(src) {
315			panic("invalid offset")
316		}
317
318		// Extend the n-byte match as long as possible.
319		l := best.length
320
321		// Extend backwards
322		tMin := s - e.maxMatchOff
323		if tMin < 0 {
324			tMin = 0
325		}
326		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
327			s--
328			t--
329			l++
330		}
331
332		// Write our sequence
333		var seq seq
334		seq.litLen = uint32(s - nextEmit)
335		seq.matchLen = uint32(l - zstdMinMatch)
336		if seq.litLen > 0 {
337			blk.literals = append(blk.literals, src[nextEmit:s]...)
338		}
339		seq.offset = uint32(s-t) + 3
340		s += l
341		if debugSequences {
342			println("sequence", seq, "next s:", s)
343		}
344		blk.sequences = append(blk.sequences, seq)
345		nextEmit = s
346		if s >= sLimit {
347			break encodeLoop
348		}
349
350		// Index match start+1 (long) -> s - 1
351		index0 := s - l + 1
352		// every entry
353		for index0 < s-1 {
354			cv0 := load6432(src, index0)
355			h0 := hash8(cv0, bestLongTableBits)
356			h1 := hash4x64(cv0, bestShortTableBits)
357			off := index0 + e.cur
358			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
359			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
360			index0++
361		}
362
363		cv = load6432(src, s)
364		if !canRepeat {
365			continue
366		}
367
368		// Check offset 2
369		for {
370			o2 := s - offset2
371			if load3232(src, o2) != uint32(cv) {
372				// Do regular search
373				break
374			}
375
376			// Store this, since we have it.
377			nextHashS := hash4x64(cv, bestShortTableBits)
378			nextHashL := hash8(cv, bestLongTableBits)
379
380			// We have at least 4 byte match.
381			// No need to check backwards. We come straight from a match
382			l := 4 + e.matchlen(s+4, o2+4, src)
383
384			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
385			e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
386			seq.matchLen = uint32(l) - zstdMinMatch
387			seq.litLen = 0
388
389			// Since litlen is always 0, this is offset 1.
390			seq.offset = 1
391			s += l
392			nextEmit = s
393			if debugSequences {
394				println("sequence", seq, "next s:", s)
395			}
396			blk.sequences = append(blk.sequences, seq)
397
398			// Swap offset 1 and 2.
399			offset1, offset2 = offset2, offset1
400			if s >= sLimit {
401				// Finished
402				break encodeLoop
403			}
404			cv = load6432(src, s)
405		}
406	}
407
408	if int(nextEmit) < len(src) {
409		blk.literals = append(blk.literals, src[nextEmit:]...)
410		blk.extraLits = len(src) - int(nextEmit)
411	}
412	blk.recentOffsets[0] = uint32(offset1)
413	blk.recentOffsets[1] = uint32(offset2)
414	blk.recentOffsets[2] = uint32(offset3)
415	if debugEncoder {
416		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
417	}
418}
419
420// EncodeNoHist will encode a block with no history and no following blocks.
421// Most notable difference is that src will not be copied for history and
422// we do not need to check for max match length.
423func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
424	e.ensureHist(len(src))
425	e.Encode(blk, src)
426}
427
428// ResetDict will reset and set a dictionary if not nil
429func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
430	e.resetBase(d, singleBlock)
431	if d == nil {
432		return
433	}
434	// Init or copy dict table
435	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
436		if len(e.dictTable) != len(e.table) {
437			e.dictTable = make([]prevEntry, len(e.table))
438		}
439		end := int32(len(d.content)) - 8 + e.maxMatchOff
440		for i := e.maxMatchOff; i < end; i += 4 {
441			const hashLog = bestShortTableBits
442
443			cv := load6432(d.content, i-e.maxMatchOff)
444			nextHash := hash4x64(cv, hashLog)      // 0 -> 4
445			nextHash1 := hash4x64(cv>>8, hashLog)  // 1 -> 5
446			nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
447			nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
448			e.dictTable[nextHash] = prevEntry{
449				prev:   e.dictTable[nextHash].offset,
450				offset: i,
451			}
452			e.dictTable[nextHash1] = prevEntry{
453				prev:   e.dictTable[nextHash1].offset,
454				offset: i + 1,
455			}
456			e.dictTable[nextHash2] = prevEntry{
457				prev:   e.dictTable[nextHash2].offset,
458				offset: i + 2,
459			}
460			e.dictTable[nextHash3] = prevEntry{
461				prev:   e.dictTable[nextHash3].offset,
462				offset: i + 3,
463			}
464		}
465		e.lastDictID = d.id
466	}
467
468	// Init or copy dict table
469	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
470		if len(e.dictLongTable) != len(e.longTable) {
471			e.dictLongTable = make([]prevEntry, len(e.longTable))
472		}
473		if len(d.content) >= 8 {
474			cv := load6432(d.content, 0)
475			h := hash8(cv, bestLongTableBits)
476			e.dictLongTable[h] = prevEntry{
477				offset: e.maxMatchOff,
478				prev:   e.dictLongTable[h].offset,
479			}
480
481			end := int32(len(d.content)) - 8 + e.maxMatchOff
482			off := 8 // First to read
483			for i := e.maxMatchOff + 1; i < end; i++ {
484				cv = cv>>8 | (uint64(d.content[off]) << 56)
485				h := hash8(cv, bestLongTableBits)
486				e.dictLongTable[h] = prevEntry{
487					offset: i,
488					prev:   e.dictLongTable[h].offset,
489				}
490				off++
491			}
492		}
493		e.lastDictID = d.id
494	}
495	// Reset table to initial state
496	copy(e.longTable[:], e.dictLongTable)
497
498	e.cur = e.maxMatchOff
499	// Reset table to initial state
500	copy(e.table[:], e.dictTable)
501}
502