1// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10	betterLongTableBits = 19                       // Bits used in the long match table
11	betterLongTableSize = 1 << betterLongTableBits // Size of the table
12
13	// Note: Increasing the short table bits or making the hash shorter
14	// can actually lead to compression degradation since it will 'steal' more from the
15	// long match table and match offsets are quite big.
16	// This greatly depends on the type of input.
17	betterShortTableBits = 13                        // Bits used in the short match table
18	betterShortTableSize = 1 << betterShortTableBits // Size of the table
19
20	betterLongTableShardCnt  = 1 << (betterLongTableBits - dictShardBits)    // Number of shards in the table
21	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
22
23	betterShortTableShardCnt  = 1 << (betterShortTableBits - dictShardBits)     // Number of shards in the table
24	betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
25)
26
27type prevEntry struct {
28	offset int32
29	prev   int32
30}
31
32// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
33// The long match table contains the previous entry with the same hash,
34// effectively making it a "chain" of length 2.
35// When we find a long match we choose between the two values and select the longest.
36// When we find a short match, after checking the long, we check if we can find a long at n+1
37// and that it is longer (lazy matching).
38type betterFastEncoder struct {
39	fastBase
40	table     [betterShortTableSize]tableEntry
41	longTable [betterLongTableSize]prevEntry
42}
43
44type betterFastEncoderDict struct {
45	betterFastEncoder
46	dictTable            []tableEntry
47	dictLongTable        []prevEntry
48	shortTableShardDirty [betterShortTableShardCnt]bool
49	longTableShardDirty  [betterLongTableShardCnt]bool
50	allDirty             bool
51}
52
53// Encode improves compression...
54func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
55	const (
56		// Input margin is the number of bytes we read (8)
57		// and the maximum we will read ahead (2)
58		inputMargin            = 8 + 2
59		minNonLiteralBlockSize = 16
60	)
61
62	// Protect against e.cur wraparound.
63	for e.cur >= bufferReset {
64		if len(e.hist) == 0 {
65			for i := range e.table[:] {
66				e.table[i] = tableEntry{}
67			}
68			for i := range e.longTable[:] {
69				e.longTable[i] = prevEntry{}
70			}
71			e.cur = e.maxMatchOff
72			break
73		}
74		// Shift down everything in the table that isn't already too far away.
75		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
76		for i := range e.table[:] {
77			v := e.table[i].offset
78			if v < minOff {
79				v = 0
80			} else {
81				v = v - e.cur + e.maxMatchOff
82			}
83			e.table[i].offset = v
84		}
85		for i := range e.longTable[:] {
86			v := e.longTable[i].offset
87			v2 := e.longTable[i].prev
88			if v < minOff {
89				v = 0
90				v2 = 0
91			} else {
92				v = v - e.cur + e.maxMatchOff
93				if v2 < minOff {
94					v2 = 0
95				} else {
96					v2 = v2 - e.cur + e.maxMatchOff
97				}
98			}
99			e.longTable[i] = prevEntry{
100				offset: v,
101				prev:   v2,
102			}
103		}
104		e.cur = e.maxMatchOff
105		break
106	}
107
108	s := e.addBlock(src)
109	blk.size = len(src)
110	if len(src) < minNonLiteralBlockSize {
111		blk.extraLits = len(src)
112		blk.literals = blk.literals[:len(src)]
113		copy(blk.literals, src)
114		return
115	}
116
117	// Override src
118	src = e.hist
119	sLimit := int32(len(src)) - inputMargin
120	// stepSize is the number of bytes to skip on every main loop iteration.
121	// It should be >= 1.
122	const stepSize = 1
123
124	const kSearchStrength = 9
125
126	// nextEmit is where in src the next emitLiteral should start from.
127	nextEmit := s
128	cv := load6432(src, s)
129
130	// Relative offsets
131	offset1 := int32(blk.recentOffsets[0])
132	offset2 := int32(blk.recentOffsets[1])
133
134	addLiterals := func(s *seq, until int32) {
135		if until == nextEmit {
136			return
137		}
138		blk.literals = append(blk.literals, src[nextEmit:until]...)
139		s.litLen = uint32(until - nextEmit)
140	}
141	if debugEncoder {
142		println("recent offsets:", blk.recentOffsets)
143	}
144
145encodeLoop:
146	for {
147		var t int32
148		// We allow the encoder to optionally turn off repeat offsets across blocks
149		canRepeat := len(blk.sequences) > 2
150		var matched int32
151
152		for {
153			if debugAsserts && canRepeat && offset1 == 0 {
154				panic("offset0 was 0")
155			}
156
157			nextHashS := hash5(cv, betterShortTableBits)
158			nextHashL := hash8(cv, betterLongTableBits)
159			candidateL := e.longTable[nextHashL]
160			candidateS := e.table[nextHashS]
161
162			const repOff = 1
163			repIndex := s - offset1 + repOff
164			off := s + e.cur
165			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
166			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
167
168			if canRepeat {
169				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
170					// Consider history as well.
171					var seq seq
172					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
173
174					seq.matchLen = uint32(lenght - zstdMinMatch)
175
176					// We might be able to match backwards.
177					// Extend as long as we can.
178					start := s + repOff
179					// We end the search early, so we don't risk 0 literals
180					// and have to do special offset treatment.
181					startLimit := nextEmit + 1
182
183					tMin := s - e.maxMatchOff
184					if tMin < 0 {
185						tMin = 0
186					}
187					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
188						repIndex--
189						start--
190						seq.matchLen++
191					}
192					addLiterals(&seq, start)
193
194					// rep 0
195					seq.offset = 1
196					if debugSequences {
197						println("repeat sequence", seq, "next s:", s)
198					}
199					blk.sequences = append(blk.sequences, seq)
200
201					// Index match start+1 (long) -> s - 1
202					index0 := s + repOff
203					s += lenght + repOff
204
205					nextEmit = s
206					if s >= sLimit {
207						if debugEncoder {
208							println("repeat ended", s, lenght)
209
210						}
211						break encodeLoop
212					}
213					// Index skipped...
214					for index0 < s-1 {
215						cv0 := load6432(src, index0)
216						cv1 := cv0 >> 8
217						h0 := hash8(cv0, betterLongTableBits)
218						off := index0 + e.cur
219						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
220						e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
221						index0 += 2
222					}
223					cv = load6432(src, s)
224					continue
225				}
226				const repOff2 = 1
227
228				// We deviate from the reference encoder and also check offset 2.
229				// Still slower and not much better, so disabled.
230				// repIndex = s - offset2 + repOff2
231				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
232					// Consider history as well.
233					var seq seq
234					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
235
236					seq.matchLen = uint32(lenght - zstdMinMatch)
237
238					// We might be able to match backwards.
239					// Extend as long as we can.
240					start := s + repOff2
241					// We end the search early, so we don't risk 0 literals
242					// and have to do special offset treatment.
243					startLimit := nextEmit + 1
244
245					tMin := s - e.maxMatchOff
246					if tMin < 0 {
247						tMin = 0
248					}
249					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
250						repIndex--
251						start--
252						seq.matchLen++
253					}
254					addLiterals(&seq, start)
255
256					// rep 2
257					seq.offset = 2
258					if debugSequences {
259						println("repeat sequence 2", seq, "next s:", s)
260					}
261					blk.sequences = append(blk.sequences, seq)
262
263					index0 := s + repOff2
264					s += lenght + repOff2
265					nextEmit = s
266					if s >= sLimit {
267						if debugEncoder {
268							println("repeat ended", s, lenght)
269
270						}
271						break encodeLoop
272					}
273
274					// Index skipped...
275					for index0 < s-1 {
276						cv0 := load6432(src, index0)
277						cv1 := cv0 >> 8
278						h0 := hash8(cv0, betterLongTableBits)
279						off := index0 + e.cur
280						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
281						e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
282						index0 += 2
283					}
284					cv = load6432(src, s)
285					// Swap offsets
286					offset1, offset2 = offset2, offset1
287					continue
288				}
289			}
290			// Find the offsets of our two matches.
291			coffsetL := candidateL.offset - e.cur
292			coffsetLP := candidateL.prev - e.cur
293
294			// Check if we have a long match.
295			if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
296				// Found a long match, at least 8 bytes.
297				matched = e.matchlen(s+8, coffsetL+8, src) + 8
298				t = coffsetL
299				if debugAsserts && s <= t {
300					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
301				}
302				if debugAsserts && s-t > e.maxMatchOff {
303					panic("s - t >e.maxMatchOff")
304				}
305				if debugMatches {
306					println("long match")
307				}
308
309				if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
310					// Found a long match, at least 8 bytes.
311					prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
312					if prevMatch > matched {
313						matched = prevMatch
314						t = coffsetLP
315					}
316					if debugAsserts && s <= t {
317						panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
318					}
319					if debugAsserts && s-t > e.maxMatchOff {
320						panic("s - t >e.maxMatchOff")
321					}
322					if debugMatches {
323						println("long match")
324					}
325				}
326				break
327			}
328
329			// Check if we have a long match on prev.
330			if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
331				// Found a long match, at least 8 bytes.
332				matched = e.matchlen(s+8, coffsetLP+8, src) + 8
333				t = coffsetLP
334				if debugAsserts && s <= t {
335					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
336				}
337				if debugAsserts && s-t > e.maxMatchOff {
338					panic("s - t >e.maxMatchOff")
339				}
340				if debugMatches {
341					println("long match")
342				}
343				break
344			}
345
346			coffsetS := candidateS.offset - e.cur
347
348			// Check if we have a short match.
349			if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
350				// found a regular match
351				matched = e.matchlen(s+4, coffsetS+4, src) + 4
352
353				// See if we can find a long match at s+1
354				const checkAt = 1
355				cv := load6432(src, s+checkAt)
356				nextHashL = hash8(cv, betterLongTableBits)
357				candidateL = e.longTable[nextHashL]
358				coffsetL = candidateL.offset - e.cur
359
360				// We can store it, since we have at least a 4 byte match.
361				e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
362				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
363					// Found a long match, at least 8 bytes.
364					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
365					if matchedNext > matched {
366						t = coffsetL
367						s += checkAt
368						matched = matchedNext
369						if debugMatches {
370							println("long match (after short)")
371						}
372						break
373					}
374				}
375
376				// Check prev long...
377				coffsetL = candidateL.prev - e.cur
378				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
379					// Found a long match, at least 8 bytes.
380					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
381					if matchedNext > matched {
382						t = coffsetL
383						s += checkAt
384						matched = matchedNext
385						if debugMatches {
386							println("prev long match (after short)")
387						}
388						break
389					}
390				}
391				t = coffsetS
392				if debugAsserts && s <= t {
393					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
394				}
395				if debugAsserts && s-t > e.maxMatchOff {
396					panic("s - t >e.maxMatchOff")
397				}
398				if debugAsserts && t < 0 {
399					panic("t<0")
400				}
401				if debugMatches {
402					println("short match")
403				}
404				break
405			}
406
407			// No match found, move forward in input.
408			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
409			if s >= sLimit {
410				break encodeLoop
411			}
412			cv = load6432(src, s)
413		}
414
415		// Try to find a better match by searching for a long match at the end of the current best match
416		if true && s+matched < sLimit {
417			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
418			cv := load3232(src, s)
419			candidateL := e.longTable[nextHashL]
420			coffsetL := candidateL.offset - e.cur - matched
421			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
422				// Found a long match, at least 4 bytes.
423				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
424				if matchedNext > matched {
425					t = coffsetL
426					matched = matchedNext
427					if debugMatches {
428						println("long match at end-of-match")
429					}
430				}
431			}
432
433			// Check prev long...
434			if true {
435				coffsetL = candidateL.prev - e.cur - matched
436				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
437					// Found a long match, at least 4 bytes.
438					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
439					if matchedNext > matched {
440						t = coffsetL
441						matched = matchedNext
442						if debugMatches {
443							println("prev long match at end-of-match")
444						}
445					}
446				}
447			}
448		}
449		// A match has been found. Update recent offsets.
450		offset2 = offset1
451		offset1 = s - t
452
453		if debugAsserts && s <= t {
454			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
455		}
456
457		if debugAsserts && canRepeat && int(offset1) > len(src) {
458			panic("invalid offset")
459		}
460
461		// Extend the n-byte match as long as possible.
462		l := matched
463
464		// Extend backwards
465		tMin := s - e.maxMatchOff
466		if tMin < 0 {
467			tMin = 0
468		}
469		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
470			s--
471			t--
472			l++
473		}
474
475		// Write our sequence
476		var seq seq
477		seq.litLen = uint32(s - nextEmit)
478		seq.matchLen = uint32(l - zstdMinMatch)
479		if seq.litLen > 0 {
480			blk.literals = append(blk.literals, src[nextEmit:s]...)
481		}
482		seq.offset = uint32(s-t) + 3
483		s += l
484		if debugSequences {
485			println("sequence", seq, "next s:", s)
486		}
487		blk.sequences = append(blk.sequences, seq)
488		nextEmit = s
489		if s >= sLimit {
490			break encodeLoop
491		}
492
493		// Index match start+1 (long) -> s - 1
494		index0 := s - l + 1
495		for index0 < s-1 {
496			cv0 := load6432(src, index0)
497			cv1 := cv0 >> 8
498			h0 := hash8(cv0, betterLongTableBits)
499			off := index0 + e.cur
500			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
501			e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
502			index0 += 2
503		}
504
505		cv = load6432(src, s)
506		if !canRepeat {
507			continue
508		}
509
510		// Check offset 2
511		for {
512			o2 := s - offset2
513			if load3232(src, o2) != uint32(cv) {
514				// Do regular search
515				break
516			}
517
518			// Store this, since we have it.
519			nextHashS := hash5(cv, betterShortTableBits)
520			nextHashL := hash8(cv, betterLongTableBits)
521
522			// We have at least 4 byte match.
523			// No need to check backwards. We come straight from a match
524			l := 4 + e.matchlen(s+4, o2+4, src)
525
526			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
527			e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
528			seq.matchLen = uint32(l) - zstdMinMatch
529			seq.litLen = 0
530
531			// Since litlen is always 0, this is offset 1.
532			seq.offset = 1
533			s += l
534			nextEmit = s
535			if debugSequences {
536				println("sequence", seq, "next s:", s)
537			}
538			blk.sequences = append(blk.sequences, seq)
539
540			// Swap offset 1 and 2.
541			offset1, offset2 = offset2, offset1
542			if s >= sLimit {
543				// Finished
544				break encodeLoop
545			}
546			cv = load6432(src, s)
547		}
548	}
549
550	if int(nextEmit) < len(src) {
551		blk.literals = append(blk.literals, src[nextEmit:]...)
552		blk.extraLits = len(src) - int(nextEmit)
553	}
554	blk.recentOffsets[0] = uint32(offset1)
555	blk.recentOffsets[1] = uint32(offset2)
556	if debugEncoder {
557		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
558	}
559}
560
561// EncodeNoHist will encode a block with no history and no following blocks.
562// Most notable difference is that src will not be copied for history and
563// we do not need to check for max match length.
564func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
565	e.ensureHist(len(src))
566	e.Encode(blk, src)
567}
568
569// Encode improves compression...
570func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
571	const (
572		// Input margin is the number of bytes we read (8)
573		// and the maximum we will read ahead (2)
574		inputMargin            = 8 + 2
575		minNonLiteralBlockSize = 16
576	)
577
578	// Protect against e.cur wraparound.
579	for e.cur >= bufferReset {
580		if len(e.hist) == 0 {
581			for i := range e.table[:] {
582				e.table[i] = tableEntry{}
583			}
584			for i := range e.longTable[:] {
585				e.longTable[i] = prevEntry{}
586			}
587			e.cur = e.maxMatchOff
588			e.allDirty = true
589			break
590		}
591		// Shift down everything in the table that isn't already too far away.
592		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
593		for i := range e.table[:] {
594			v := e.table[i].offset
595			if v < minOff {
596				v = 0
597			} else {
598				v = v - e.cur + e.maxMatchOff
599			}
600			e.table[i].offset = v
601		}
602		for i := range e.longTable[:] {
603			v := e.longTable[i].offset
604			v2 := e.longTable[i].prev
605			if v < minOff {
606				v = 0
607				v2 = 0
608			} else {
609				v = v - e.cur + e.maxMatchOff
610				if v2 < minOff {
611					v2 = 0
612				} else {
613					v2 = v2 - e.cur + e.maxMatchOff
614				}
615			}
616			e.longTable[i] = prevEntry{
617				offset: v,
618				prev:   v2,
619			}
620		}
621		e.allDirty = true
622		e.cur = e.maxMatchOff
623		break
624	}
625
626	s := e.addBlock(src)
627	blk.size = len(src)
628	if len(src) < minNonLiteralBlockSize {
629		blk.extraLits = len(src)
630		blk.literals = blk.literals[:len(src)]
631		copy(blk.literals, src)
632		return
633	}
634
635	// Override src
636	src = e.hist
637	sLimit := int32(len(src)) - inputMargin
638	// stepSize is the number of bytes to skip on every main loop iteration.
639	// It should be >= 1.
640	const stepSize = 1
641
642	const kSearchStrength = 9
643
644	// nextEmit is where in src the next emitLiteral should start from.
645	nextEmit := s
646	cv := load6432(src, s)
647
648	// Relative offsets
649	offset1 := int32(blk.recentOffsets[0])
650	offset2 := int32(blk.recentOffsets[1])
651
652	addLiterals := func(s *seq, until int32) {
653		if until == nextEmit {
654			return
655		}
656		blk.literals = append(blk.literals, src[nextEmit:until]...)
657		s.litLen = uint32(until - nextEmit)
658	}
659	if debugEncoder {
660		println("recent offsets:", blk.recentOffsets)
661	}
662
663encodeLoop:
664	for {
665		var t int32
666		// We allow the encoder to optionally turn off repeat offsets across blocks
667		canRepeat := len(blk.sequences) > 2
668		var matched int32
669
670		for {
671			if debugAsserts && canRepeat && offset1 == 0 {
672				panic("offset0 was 0")
673			}
674
675			nextHashS := hash5(cv, betterShortTableBits)
676			nextHashL := hash8(cv, betterLongTableBits)
677			candidateL := e.longTable[nextHashL]
678			candidateS := e.table[nextHashS]
679
680			const repOff = 1
681			repIndex := s - offset1 + repOff
682			off := s + e.cur
683			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
684			e.markLongShardDirty(nextHashL)
685			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
686			e.markShortShardDirty(nextHashS)
687
688			if canRepeat {
689				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
690					// Consider history as well.
691					var seq seq
692					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
693
694					seq.matchLen = uint32(lenght - zstdMinMatch)
695
696					// We might be able to match backwards.
697					// Extend as long as we can.
698					start := s + repOff
699					// We end the search early, so we don't risk 0 literals
700					// and have to do special offset treatment.
701					startLimit := nextEmit + 1
702
703					tMin := s - e.maxMatchOff
704					if tMin < 0 {
705						tMin = 0
706					}
707					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
708						repIndex--
709						start--
710						seq.matchLen++
711					}
712					addLiterals(&seq, start)
713
714					// rep 0
715					seq.offset = 1
716					if debugSequences {
717						println("repeat sequence", seq, "next s:", s)
718					}
719					blk.sequences = append(blk.sequences, seq)
720
721					// Index match start+1 (long) -> s - 1
722					index0 := s + repOff
723					s += lenght + repOff
724
725					nextEmit = s
726					if s >= sLimit {
727						if debugEncoder {
728							println("repeat ended", s, lenght)
729
730						}
731						break encodeLoop
732					}
733					// Index skipped...
734					for index0 < s-1 {
735						cv0 := load6432(src, index0)
736						cv1 := cv0 >> 8
737						h0 := hash8(cv0, betterLongTableBits)
738						off := index0 + e.cur
739						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
740						e.markLongShardDirty(h0)
741						h1 := hash5(cv1, betterShortTableBits)
742						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
743						e.markShortShardDirty(h1)
744						index0 += 2
745					}
746					cv = load6432(src, s)
747					continue
748				}
749				const repOff2 = 1
750
751				// We deviate from the reference encoder and also check offset 2.
752				// Still slower and not much better, so disabled.
753				// repIndex = s - offset2 + repOff2
754				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
755					// Consider history as well.
756					var seq seq
757					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
758
759					seq.matchLen = uint32(lenght - zstdMinMatch)
760
761					// We might be able to match backwards.
762					// Extend as long as we can.
763					start := s + repOff2
764					// We end the search early, so we don't risk 0 literals
765					// and have to do special offset treatment.
766					startLimit := nextEmit + 1
767
768					tMin := s - e.maxMatchOff
769					if tMin < 0 {
770						tMin = 0
771					}
772					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
773						repIndex--
774						start--
775						seq.matchLen++
776					}
777					addLiterals(&seq, start)
778
779					// rep 2
780					seq.offset = 2
781					if debugSequences {
782						println("repeat sequence 2", seq, "next s:", s)
783					}
784					blk.sequences = append(blk.sequences, seq)
785
786					index0 := s + repOff2
787					s += lenght + repOff2
788					nextEmit = s
789					if s >= sLimit {
790						if debugEncoder {
791							println("repeat ended", s, lenght)
792
793						}
794						break encodeLoop
795					}
796
797					// Index skipped...
798					for index0 < s-1 {
799						cv0 := load6432(src, index0)
800						cv1 := cv0 >> 8
801						h0 := hash8(cv0, betterLongTableBits)
802						off := index0 + e.cur
803						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
804						e.markLongShardDirty(h0)
805						h1 := hash5(cv1, betterShortTableBits)
806						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
807						e.markShortShardDirty(h1)
808						index0 += 2
809					}
810					cv = load6432(src, s)
811					// Swap offsets
812					offset1, offset2 = offset2, offset1
813					continue
814				}
815			}
816			// Find the offsets of our two matches.
817			coffsetL := candidateL.offset - e.cur
818			coffsetLP := candidateL.prev - e.cur
819
820			// Check if we have a long match.
821			if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
822				// Found a long match, at least 8 bytes.
823				matched = e.matchlen(s+8, coffsetL+8, src) + 8
824				t = coffsetL
825				if debugAsserts && s <= t {
826					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
827				}
828				if debugAsserts && s-t > e.maxMatchOff {
829					panic("s - t >e.maxMatchOff")
830				}
831				if debugMatches {
832					println("long match")
833				}
834
835				if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
836					// Found a long match, at least 8 bytes.
837					prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
838					if prevMatch > matched {
839						matched = prevMatch
840						t = coffsetLP
841					}
842					if debugAsserts && s <= t {
843						panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
844					}
845					if debugAsserts && s-t > e.maxMatchOff {
846						panic("s - t >e.maxMatchOff")
847					}
848					if debugMatches {
849						println("long match")
850					}
851				}
852				break
853			}
854
855			// Check if we have a long match on prev.
856			if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
857				// Found a long match, at least 8 bytes.
858				matched = e.matchlen(s+8, coffsetLP+8, src) + 8
859				t = coffsetLP
860				if debugAsserts && s <= t {
861					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
862				}
863				if debugAsserts && s-t > e.maxMatchOff {
864					panic("s - t >e.maxMatchOff")
865				}
866				if debugMatches {
867					println("long match")
868				}
869				break
870			}
871
872			coffsetS := candidateS.offset - e.cur
873
874			// Check if we have a short match.
875			if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
876				// found a regular match
877				matched = e.matchlen(s+4, coffsetS+4, src) + 4
878
879				// See if we can find a long match at s+1
880				const checkAt = 1
881				cv := load6432(src, s+checkAt)
882				nextHashL = hash8(cv, betterLongTableBits)
883				candidateL = e.longTable[nextHashL]
884				coffsetL = candidateL.offset - e.cur
885
886				// We can store it, since we have at least a 4 byte match.
887				e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
888				e.markLongShardDirty(nextHashL)
889				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
890					// Found a long match, at least 8 bytes.
891					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
892					if matchedNext > matched {
893						t = coffsetL
894						s += checkAt
895						matched = matchedNext
896						if debugMatches {
897							println("long match (after short)")
898						}
899						break
900					}
901				}
902
903				// Check prev long...
904				coffsetL = candidateL.prev - e.cur
905				if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
906					// Found a long match, at least 8 bytes.
907					matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
908					if matchedNext > matched {
909						t = coffsetL
910						s += checkAt
911						matched = matchedNext
912						if debugMatches {
913							println("prev long match (after short)")
914						}
915						break
916					}
917				}
918				t = coffsetS
919				if debugAsserts && s <= t {
920					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
921				}
922				if debugAsserts && s-t > e.maxMatchOff {
923					panic("s - t >e.maxMatchOff")
924				}
925				if debugAsserts && t < 0 {
926					panic("t<0")
927				}
928				if debugMatches {
929					println("short match")
930				}
931				break
932			}
933
934			// No match found, move forward in input.
935			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
936			if s >= sLimit {
937				break encodeLoop
938			}
939			cv = load6432(src, s)
940		}
941		// Try to find a better match by searching for a long match at the end of the current best match
942		if s+matched < sLimit {
943			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
944			cv := load3232(src, s)
945			candidateL := e.longTable[nextHashL]
946			coffsetL := candidateL.offset - e.cur - matched
947			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
948				// Found a long match, at least 4 bytes.
949				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
950				if matchedNext > matched {
951					t = coffsetL
952					matched = matchedNext
953					if debugMatches {
954						println("long match at end-of-match")
955					}
956				}
957			}
958
959			// Check prev long...
960			if true {
961				coffsetL = candidateL.prev - e.cur - matched
962				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
963					// Found a long match, at least 4 bytes.
964					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
965					if matchedNext > matched {
966						t = coffsetL
967						matched = matchedNext
968						if debugMatches {
969							println("prev long match at end-of-match")
970						}
971					}
972				}
973			}
974		}
975		// A match has been found. Update recent offsets.
976		offset2 = offset1
977		offset1 = s - t
978
979		if debugAsserts && s <= t {
980			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
981		}
982
983		if debugAsserts && canRepeat && int(offset1) > len(src) {
984			panic("invalid offset")
985		}
986
987		// Extend the n-byte match as long as possible.
988		l := matched
989
990		// Extend backwards
991		tMin := s - e.maxMatchOff
992		if tMin < 0 {
993			tMin = 0
994		}
995		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
996			s--
997			t--
998			l++
999		}
1000
1001		// Write our sequence
1002		var seq seq
1003		seq.litLen = uint32(s - nextEmit)
1004		seq.matchLen = uint32(l - zstdMinMatch)
1005		if seq.litLen > 0 {
1006			blk.literals = append(blk.literals, src[nextEmit:s]...)
1007		}
1008		seq.offset = uint32(s-t) + 3
1009		s += l
1010		if debugSequences {
1011			println("sequence", seq, "next s:", s)
1012		}
1013		blk.sequences = append(blk.sequences, seq)
1014		nextEmit = s
1015		if s >= sLimit {
1016			break encodeLoop
1017		}
1018
1019		// Index match start+1 (long) -> s - 1
1020		index0 := s - l + 1
1021		for index0 < s-1 {
1022			cv0 := load6432(src, index0)
1023			cv1 := cv0 >> 8
1024			h0 := hash8(cv0, betterLongTableBits)
1025			off := index0 + e.cur
1026			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
1027			e.markLongShardDirty(h0)
1028			h1 := hash5(cv1, betterShortTableBits)
1029			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
1030			e.markShortShardDirty(h1)
1031			index0 += 2
1032		}
1033
1034		cv = load6432(src, s)
1035		if !canRepeat {
1036			continue
1037		}
1038
1039		// Check offset 2
1040		for {
1041			o2 := s - offset2
1042			if load3232(src, o2) != uint32(cv) {
1043				// Do regular search
1044				break
1045			}
1046
1047			// Store this, since we have it.
1048			nextHashS := hash5(cv, betterShortTableBits)
1049			nextHashL := hash8(cv, betterLongTableBits)
1050
1051			// We have at least 4 byte match.
1052			// No need to check backwards. We come straight from a match
1053			l := 4 + e.matchlen(s+4, o2+4, src)
1054
1055			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
1056			e.markLongShardDirty(nextHashL)
1057			e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
1058			e.markShortShardDirty(nextHashS)
1059			seq.matchLen = uint32(l) - zstdMinMatch
1060			seq.litLen = 0
1061
1062			// Since litlen is always 0, this is offset 1.
1063			seq.offset = 1
1064			s += l
1065			nextEmit = s
1066			if debugSequences {
1067				println("sequence", seq, "next s:", s)
1068			}
1069			blk.sequences = append(blk.sequences, seq)
1070
1071			// Swap offset 1 and 2.
1072			offset1, offset2 = offset2, offset1
1073			if s >= sLimit {
1074				// Finished
1075				break encodeLoop
1076			}
1077			cv = load6432(src, s)
1078		}
1079	}
1080
1081	if int(nextEmit) < len(src) {
1082		blk.literals = append(blk.literals, src[nextEmit:]...)
1083		blk.extraLits = len(src) - int(nextEmit)
1084	}
1085	blk.recentOffsets[0] = uint32(offset1)
1086	blk.recentOffsets[1] = uint32(offset2)
1087	if debugEncoder {
1088		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1089	}
1090}
1091
1092// ResetDict will reset and set a dictionary if not nil
1093func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
1094	e.resetBase(d, singleBlock)
1095	if d != nil {
1096		panic("betterFastEncoder: Reset with dict")
1097	}
1098}
1099
1100// ResetDict will reset and set a dictionary if not nil
1101func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
1102	e.resetBase(d, singleBlock)
1103	if d == nil {
1104		return
1105	}
1106	// Init or copy dict table
1107	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
1108		if len(e.dictTable) != len(e.table) {
1109			e.dictTable = make([]tableEntry, len(e.table))
1110		}
1111		end := int32(len(d.content)) - 8 + e.maxMatchOff
1112		for i := e.maxMatchOff; i < end; i += 4 {
1113			const hashLog = betterShortTableBits
1114
1115			cv := load6432(d.content, i-e.maxMatchOff)
1116			nextHash := hash5(cv, hashLog)      // 0 -> 4
1117			nextHash1 := hash5(cv>>8, hashLog)  // 1 -> 5
1118			nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
1119			nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
1120			e.dictTable[nextHash] = tableEntry{
1121				val:    uint32(cv),
1122				offset: i,
1123			}
1124			e.dictTable[nextHash1] = tableEntry{
1125				val:    uint32(cv >> 8),
1126				offset: i + 1,
1127			}
1128			e.dictTable[nextHash2] = tableEntry{
1129				val:    uint32(cv >> 16),
1130				offset: i + 2,
1131			}
1132			e.dictTable[nextHash3] = tableEntry{
1133				val:    uint32(cv >> 24),
1134				offset: i + 3,
1135			}
1136		}
1137		e.lastDictID = d.id
1138		e.allDirty = true
1139	}
1140
1141	// Init or copy dict table
1142	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1143		if len(e.dictLongTable) != len(e.longTable) {
1144			e.dictLongTable = make([]prevEntry, len(e.longTable))
1145		}
1146		if len(d.content) >= 8 {
1147			cv := load6432(d.content, 0)
1148			h := hash8(cv, betterLongTableBits)
1149			e.dictLongTable[h] = prevEntry{
1150				offset: e.maxMatchOff,
1151				prev:   e.dictLongTable[h].offset,
1152			}
1153
1154			end := int32(len(d.content)) - 8 + e.maxMatchOff
1155			off := 8 // First to read
1156			for i := e.maxMatchOff + 1; i < end; i++ {
1157				cv = cv>>8 | (uint64(d.content[off]) << 56)
1158				h := hash8(cv, betterLongTableBits)
1159				e.dictLongTable[h] = prevEntry{
1160					offset: i,
1161					prev:   e.dictLongTable[h].offset,
1162				}
1163				off++
1164			}
1165		}
1166		e.lastDictID = d.id
1167		e.allDirty = true
1168	}
1169
1170	// Reset table to initial state
1171	{
1172		dirtyShardCnt := 0
1173		if !e.allDirty {
1174			for i := range e.shortTableShardDirty {
1175				if e.shortTableShardDirty[i] {
1176					dirtyShardCnt++
1177				}
1178			}
1179		}
1180		const shardCnt = betterShortTableShardCnt
1181		const shardSize = betterShortTableShardSize
1182		if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1183			copy(e.table[:], e.dictTable)
1184			for i := range e.shortTableShardDirty {
1185				e.shortTableShardDirty[i] = false
1186			}
1187		} else {
1188			for i := range e.shortTableShardDirty {
1189				if !e.shortTableShardDirty[i] {
1190					continue
1191				}
1192
1193				copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1194				e.shortTableShardDirty[i] = false
1195			}
1196		}
1197	}
1198	{
1199		dirtyShardCnt := 0
1200		if !e.allDirty {
1201			for i := range e.shortTableShardDirty {
1202				if e.shortTableShardDirty[i] {
1203					dirtyShardCnt++
1204				}
1205			}
1206		}
1207		const shardCnt = betterLongTableShardCnt
1208		const shardSize = betterLongTableShardSize
1209		if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1210			copy(e.longTable[:], e.dictLongTable)
1211			for i := range e.longTableShardDirty {
1212				e.longTableShardDirty[i] = false
1213			}
1214		} else {
1215			for i := range e.longTableShardDirty {
1216				if !e.longTableShardDirty[i] {
1217					continue
1218				}
1219
1220				copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
1221				e.longTableShardDirty[i] = false
1222			}
1223		}
1224	}
1225	e.cur = e.maxMatchOff
1226	e.allDirty = false
1227}
1228
1229func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
1230	e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
1231}
1232
1233func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
1234	e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
1235}
1236