1// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8	"fmt"
9	"math"
10	"math/bits"
11)
12
13const (
14	tableBits      = 15                               // Bits used in the table
15	tableSize      = 1 << tableBits                   // Size of the table
16	tableShardCnt  = 1 << (tableBits - dictShardBits) // Number of shards in the table
17	tableShardSize = tableSize / tableShardCnt        // Size of an individual shard
18	tableMask      = tableSize - 1                    // Mask for table indices. Redundant, but can eliminate bounds checks.
19	maxMatchLength = 131074
20)
21
22type tableEntry struct {
23	val    uint32
24	offset int32
25}
26
27type fastEncoder struct {
28	fastBase
29	table [tableSize]tableEntry
30}
31
32type fastEncoderDict struct {
33	fastEncoder
34	dictTable       []tableEntry
35	tableShardDirty [tableShardCnt]bool
36	allDirty        bool
37}
38
39// Encode mimmics functionality in zstd_fast.c
40func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
41	const (
42		inputMargin            = 8
43		minNonLiteralBlockSize = 1 + 1 + inputMargin
44	)
45
46	// Protect against e.cur wraparound.
47	for e.cur >= bufferReset {
48		if len(e.hist) == 0 {
49			for i := range e.table[:] {
50				e.table[i] = tableEntry{}
51			}
52			e.cur = e.maxMatchOff
53			break
54		}
55		// Shift down everything in the table that isn't already too far away.
56		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
57		for i := range e.table[:] {
58			v := e.table[i].offset
59			if v < minOff {
60				v = 0
61			} else {
62				v = v - e.cur + e.maxMatchOff
63			}
64			e.table[i].offset = v
65		}
66		e.cur = e.maxMatchOff
67		break
68	}
69
70	s := e.addBlock(src)
71	blk.size = len(src)
72	if len(src) < minNonLiteralBlockSize {
73		blk.extraLits = len(src)
74		blk.literals = blk.literals[:len(src)]
75		copy(blk.literals, src)
76		return
77	}
78
79	// Override src
80	src = e.hist
81	sLimit := int32(len(src)) - inputMargin
82	// stepSize is the number of bytes to skip on every main loop iteration.
83	// It should be >= 2.
84	const stepSize = 2
85
86	// TEMPLATE
87	const hashLog = tableBits
88	// seems global, but would be nice to tweak.
89	const kSearchStrength = 7
90
91	// nextEmit is where in src the next emitLiteral should start from.
92	nextEmit := s
93	cv := load6432(src, s)
94
95	// Relative offsets
96	offset1 := int32(blk.recentOffsets[0])
97	offset2 := int32(blk.recentOffsets[1])
98
99	addLiterals := func(s *seq, until int32) {
100		if until == nextEmit {
101			return
102		}
103		blk.literals = append(blk.literals, src[nextEmit:until]...)
104		s.litLen = uint32(until - nextEmit)
105	}
106	if debug {
107		println("recent offsets:", blk.recentOffsets)
108	}
109
110encodeLoop:
111	for {
112		// t will contain the match offset when we find one.
113		// When existing the search loop, we have already checked 4 bytes.
114		var t int32
115
116		// We will not use repeat offsets across blocks.
117		// By not using them for the first 3 matches
118		canRepeat := len(blk.sequences) > 2
119
120		for {
121			if debugAsserts && canRepeat && offset1 == 0 {
122				panic("offset0 was 0")
123			}
124
125			nextHash := hash6(cv, hashLog)
126			nextHash2 := hash6(cv>>8, hashLog)
127			candidate := e.table[nextHash]
128			candidate2 := e.table[nextHash2]
129			repIndex := s - offset1 + 2
130
131			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
132			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
133
134			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
135				// Consider history as well.
136				var seq seq
137				var length int32
138				// length = 4 + e.matchlen(s+6, repIndex+4, src)
139				{
140					a := src[s+6:]
141					b := src[repIndex+4:]
142					endI := len(a) & (math.MaxInt32 - 7)
143					length = int32(endI) + 4
144					for i := 0; i < endI; i += 8 {
145						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
146							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
147							break
148						}
149					}
150				}
151
152				seq.matchLen = uint32(length - zstdMinMatch)
153
154				// We might be able to match backwards.
155				// Extend as long as we can.
156				start := s + 2
157				// We end the search early, so we don't risk 0 literals
158				// and have to do special offset treatment.
159				startLimit := nextEmit + 1
160
161				sMin := s - e.maxMatchOff
162				if sMin < 0 {
163					sMin = 0
164				}
165				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
166					repIndex--
167					start--
168					seq.matchLen++
169				}
170				addLiterals(&seq, start)
171
172				// rep 0
173				seq.offset = 1
174				if debugSequences {
175					println("repeat sequence", seq, "next s:", s)
176				}
177				blk.sequences = append(blk.sequences, seq)
178				s += length + 2
179				nextEmit = s
180				if s >= sLimit {
181					if debug {
182						println("repeat ended", s, length)
183
184					}
185					break encodeLoop
186				}
187				cv = load6432(src, s)
188				continue
189			}
190			coffset0 := s - (candidate.offset - e.cur)
191			coffset1 := s - (candidate2.offset - e.cur) + 1
192			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
193				// found a regular match
194				t = candidate.offset - e.cur
195				if debugAsserts && s <= t {
196					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
197				}
198				if debugAsserts && s-t > e.maxMatchOff {
199					panic("s - t >e.maxMatchOff")
200				}
201				break
202			}
203
204			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
205				// found a regular match
206				t = candidate2.offset - e.cur
207				s++
208				if debugAsserts && s <= t {
209					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
210				}
211				if debugAsserts && s-t > e.maxMatchOff {
212					panic("s - t >e.maxMatchOff")
213				}
214				if debugAsserts && t < 0 {
215					panic("t<0")
216				}
217				break
218			}
219			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
220			if s >= sLimit {
221				break encodeLoop
222			}
223			cv = load6432(src, s)
224		}
225		// A 4-byte match has been found. We'll later see if more than 4 bytes.
226		offset2 = offset1
227		offset1 = s - t
228
229		if debugAsserts && s <= t {
230			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
231		}
232
233		if debugAsserts && canRepeat && int(offset1) > len(src) {
234			panic("invalid offset")
235		}
236
237		// Extend the 4-byte match as long as possible.
238		//l := e.matchlen(s+4, t+4, src) + 4
239		var l int32
240		{
241			a := src[s+4:]
242			b := src[t+4:]
243			endI := len(a) & (math.MaxInt32 - 7)
244			l = int32(endI) + 4
245			for i := 0; i < endI; i += 8 {
246				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
247					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
248					break
249				}
250			}
251		}
252
253		// Extend backwards
254		tMin := s - e.maxMatchOff
255		if tMin < 0 {
256			tMin = 0
257		}
258		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
259			s--
260			t--
261			l++
262		}
263
264		// Write our sequence.
265		var seq seq
266		seq.litLen = uint32(s - nextEmit)
267		seq.matchLen = uint32(l - zstdMinMatch)
268		if seq.litLen > 0 {
269			blk.literals = append(blk.literals, src[nextEmit:s]...)
270		}
271		// Don't use repeat offsets
272		seq.offset = uint32(s-t) + 3
273		s += l
274		if debugSequences {
275			println("sequence", seq, "next s:", s)
276		}
277		blk.sequences = append(blk.sequences, seq)
278		nextEmit = s
279		if s >= sLimit {
280			break encodeLoop
281		}
282		cv = load6432(src, s)
283
284		// Check offset 2
285		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
286			// We have at least 4 byte match.
287			// No need to check backwards. We come straight from a match
288			//l := 4 + e.matchlen(s+4, o2+4, src)
289			var l int32
290			{
291				a := src[s+4:]
292				b := src[o2+4:]
293				endI := len(a) & (math.MaxInt32 - 7)
294				l = int32(endI) + 4
295				for i := 0; i < endI; i += 8 {
296					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
297						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
298						break
299					}
300				}
301			}
302
303			// Store this, since we have it.
304			nextHash := hash6(cv, hashLog)
305			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
306			seq.matchLen = uint32(l) - zstdMinMatch
307			seq.litLen = 0
308			// Since litlen is always 0, this is offset 1.
309			seq.offset = 1
310			s += l
311			nextEmit = s
312			if debugSequences {
313				println("sequence", seq, "next s:", s)
314			}
315			blk.sequences = append(blk.sequences, seq)
316
317			// Swap offset 1 and 2.
318			offset1, offset2 = offset2, offset1
319			if s >= sLimit {
320				break encodeLoop
321			}
322			// Prepare next loop.
323			cv = load6432(src, s)
324		}
325	}
326
327	if int(nextEmit) < len(src) {
328		blk.literals = append(blk.literals, src[nextEmit:]...)
329		blk.extraLits = len(src) - int(nextEmit)
330	}
331	blk.recentOffsets[0] = uint32(offset1)
332	blk.recentOffsets[1] = uint32(offset2)
333	if debug {
334		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
335	}
336}
337
338// EncodeNoHist will encode a block with no history and no following blocks.
339// Most notable difference is that src will not be copied for history and
340// we do not need to check for max match length.
341func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
342	const (
343		inputMargin            = 8
344		minNonLiteralBlockSize = 1 + 1 + inputMargin
345	)
346	if debug {
347		if len(src) > maxBlockSize {
348			panic("src too big")
349		}
350	}
351
352	// Protect against e.cur wraparound.
353	if e.cur >= bufferReset {
354		for i := range e.table[:] {
355			e.table[i] = tableEntry{}
356		}
357		e.cur = e.maxMatchOff
358	}
359
360	s := int32(0)
361	blk.size = len(src)
362	if len(src) < minNonLiteralBlockSize {
363		blk.extraLits = len(src)
364		blk.literals = blk.literals[:len(src)]
365		copy(blk.literals, src)
366		return
367	}
368
369	sLimit := int32(len(src)) - inputMargin
370	// stepSize is the number of bytes to skip on every main loop iteration.
371	// It should be >= 2.
372	const stepSize = 2
373
374	// TEMPLATE
375	const hashLog = tableBits
376	// seems global, but would be nice to tweak.
377	const kSearchStrength = 8
378
379	// nextEmit is where in src the next emitLiteral should start from.
380	nextEmit := s
381	cv := load6432(src, s)
382
383	// Relative offsets
384	offset1 := int32(blk.recentOffsets[0])
385	offset2 := int32(blk.recentOffsets[1])
386
387	addLiterals := func(s *seq, until int32) {
388		if until == nextEmit {
389			return
390		}
391		blk.literals = append(blk.literals, src[nextEmit:until]...)
392		s.litLen = uint32(until - nextEmit)
393	}
394	if debug {
395		println("recent offsets:", blk.recentOffsets)
396	}
397
398encodeLoop:
399	for {
400		// t will contain the match offset when we find one.
401		// When existing the search loop, we have already checked 4 bytes.
402		var t int32
403
404		// We will not use repeat offsets across blocks.
405		// By not using them for the first 3 matches
406
407		for {
408			nextHash := hash6(cv, hashLog)
409			nextHash2 := hash6(cv>>8, hashLog)
410			candidate := e.table[nextHash]
411			candidate2 := e.table[nextHash2]
412			repIndex := s - offset1 + 2
413
414			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
415			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
416
417			if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
418				// Consider history as well.
419				var seq seq
420				// length := 4 + e.matchlen(s+6, repIndex+4, src)
421				// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
422				var length int32
423				{
424					a := src[s+6:]
425					b := src[repIndex+4:]
426					endI := len(a) & (math.MaxInt32 - 7)
427					length = int32(endI) + 4
428					for i := 0; i < endI; i += 8 {
429						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
430							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
431							break
432						}
433					}
434				}
435
436				seq.matchLen = uint32(length - zstdMinMatch)
437
438				// We might be able to match backwards.
439				// Extend as long as we can.
440				start := s + 2
441				// We end the search early, so we don't risk 0 literals
442				// and have to do special offset treatment.
443				startLimit := nextEmit + 1
444
445				sMin := s - e.maxMatchOff
446				if sMin < 0 {
447					sMin = 0
448				}
449				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
450					repIndex--
451					start--
452					seq.matchLen++
453				}
454				addLiterals(&seq, start)
455
456				// rep 0
457				seq.offset = 1
458				if debugSequences {
459					println("repeat sequence", seq, "next s:", s)
460				}
461				blk.sequences = append(blk.sequences, seq)
462				s += length + 2
463				nextEmit = s
464				if s >= sLimit {
465					if debug {
466						println("repeat ended", s, length)
467
468					}
469					break encodeLoop
470				}
471				cv = load6432(src, s)
472				continue
473			}
474			coffset0 := s - (candidate.offset - e.cur)
475			coffset1 := s - (candidate2.offset - e.cur) + 1
476			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
477				// found a regular match
478				t = candidate.offset - e.cur
479				if debugAsserts && s <= t {
480					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
481				}
482				if debugAsserts && s-t > e.maxMatchOff {
483					panic("s - t >e.maxMatchOff")
484				}
485				if debugAsserts && t < 0 {
486					panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
487				}
488				break
489			}
490
491			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
492				// found a regular match
493				t = candidate2.offset - e.cur
494				s++
495				if debugAsserts && s <= t {
496					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
497				}
498				if debugAsserts && s-t > e.maxMatchOff {
499					panic("s - t >e.maxMatchOff")
500				}
501				if debugAsserts && t < 0 {
502					panic("t<0")
503				}
504				break
505			}
506			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
507			if s >= sLimit {
508				break encodeLoop
509			}
510			cv = load6432(src, s)
511		}
512		// A 4-byte match has been found. We'll later see if more than 4 bytes.
513		offset2 = offset1
514		offset1 = s - t
515
516		if debugAsserts && s <= t {
517			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
518		}
519
520		if debugAsserts && t < 0 {
521			panic(fmt.Sprintf("t (%d) < 0 ", t))
522		}
523		// Extend the 4-byte match as long as possible.
524		//l := e.matchlenNoHist(s+4, t+4, src) + 4
525		// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
526		var l int32
527		{
528			a := src[s+4:]
529			b := src[t+4:]
530			endI := len(a) & (math.MaxInt32 - 7)
531			l = int32(endI) + 4
532			for i := 0; i < endI; i += 8 {
533				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
534					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
535					break
536				}
537			}
538		}
539
540		// Extend backwards
541		tMin := s - e.maxMatchOff
542		if tMin < 0 {
543			tMin = 0
544		}
545		for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
546			s--
547			t--
548			l++
549		}
550
551		// Write our sequence.
552		var seq seq
553		seq.litLen = uint32(s - nextEmit)
554		seq.matchLen = uint32(l - zstdMinMatch)
555		if seq.litLen > 0 {
556			blk.literals = append(blk.literals, src[nextEmit:s]...)
557		}
558		// Don't use repeat offsets
559		seq.offset = uint32(s-t) + 3
560		s += l
561		if debugSequences {
562			println("sequence", seq, "next s:", s)
563		}
564		blk.sequences = append(blk.sequences, seq)
565		nextEmit = s
566		if s >= sLimit {
567			break encodeLoop
568		}
569		cv = load6432(src, s)
570
571		// Check offset 2
572		if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
573			// We have at least 4 byte match.
574			// No need to check backwards. We come straight from a match
575			//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
576			// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
577			var l int32
578			{
579				a := src[s+4:]
580				b := src[o2+4:]
581				endI := len(a) & (math.MaxInt32 - 7)
582				l = int32(endI) + 4
583				for i := 0; i < endI; i += 8 {
584					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
585						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
586						break
587					}
588				}
589			}
590
591			// Store this, since we have it.
592			nextHash := hash6(cv, hashLog)
593			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
594			seq.matchLen = uint32(l) - zstdMinMatch
595			seq.litLen = 0
596			// Since litlen is always 0, this is offset 1.
597			seq.offset = 1
598			s += l
599			nextEmit = s
600			if debugSequences {
601				println("sequence", seq, "next s:", s)
602			}
603			blk.sequences = append(blk.sequences, seq)
604
605			// Swap offset 1 and 2.
606			offset1, offset2 = offset2, offset1
607			if s >= sLimit {
608				break encodeLoop
609			}
610			// Prepare next loop.
611			cv = load6432(src, s)
612		}
613	}
614
615	if int(nextEmit) < len(src) {
616		blk.literals = append(blk.literals, src[nextEmit:]...)
617		blk.extraLits = len(src) - int(nextEmit)
618	}
619	if debug {
620		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
621	}
622	// We do not store history, so we must offset e.cur to avoid false matches for next user.
623	if e.cur < bufferReset {
624		e.cur += int32(len(src))
625	}
626}
627
628// Encode will encode the content, with a dictionary if initialized for it.
629func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
630	const (
631		inputMargin            = 8
632		minNonLiteralBlockSize = 1 + 1 + inputMargin
633	)
634	if e.allDirty || len(src) > 32<<10 {
635		e.fastEncoder.Encode(blk, src)
636		e.allDirty = true
637		return
638	}
639	// Protect against e.cur wraparound.
640	for e.cur >= bufferReset {
641		if len(e.hist) == 0 {
642			for i := range e.table[:] {
643				e.table[i] = tableEntry{}
644			}
645			e.cur = e.maxMatchOff
646			break
647		}
648		// Shift down everything in the table that isn't already too far away.
649		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
650		for i := range e.table[:] {
651			v := e.table[i].offset
652			if v < minOff {
653				v = 0
654			} else {
655				v = v - e.cur + e.maxMatchOff
656			}
657			e.table[i].offset = v
658		}
659		e.cur = e.maxMatchOff
660		break
661	}
662
663	s := e.addBlock(src)
664	blk.size = len(src)
665	if len(src) < minNonLiteralBlockSize {
666		blk.extraLits = len(src)
667		blk.literals = blk.literals[:len(src)]
668		copy(blk.literals, src)
669		return
670	}
671
672	// Override src
673	src = e.hist
674	sLimit := int32(len(src)) - inputMargin
675	// stepSize is the number of bytes to skip on every main loop iteration.
676	// It should be >= 2.
677	const stepSize = 2
678
679	// TEMPLATE
680	const hashLog = tableBits
681	// seems global, but would be nice to tweak.
682	const kSearchStrength = 7
683
684	// nextEmit is where in src the next emitLiteral should start from.
685	nextEmit := s
686	cv := load6432(src, s)
687
688	// Relative offsets
689	offset1 := int32(blk.recentOffsets[0])
690	offset2 := int32(blk.recentOffsets[1])
691
692	addLiterals := func(s *seq, until int32) {
693		if until == nextEmit {
694			return
695		}
696		blk.literals = append(blk.literals, src[nextEmit:until]...)
697		s.litLen = uint32(until - nextEmit)
698	}
699	if debug {
700		println("recent offsets:", blk.recentOffsets)
701	}
702
703encodeLoop:
704	for {
705		// t will contain the match offset when we find one.
706		// When existing the search loop, we have already checked 4 bytes.
707		var t int32
708
709		// We will not use repeat offsets across blocks.
710		// By not using them for the first 3 matches
711		canRepeat := len(blk.sequences) > 2
712
713		for {
714			if debugAsserts && canRepeat && offset1 == 0 {
715				panic("offset0 was 0")
716			}
717
718			nextHash := hash6(cv, hashLog)
719			nextHash2 := hash6(cv>>8, hashLog)
720			candidate := e.table[nextHash]
721			candidate2 := e.table[nextHash2]
722			repIndex := s - offset1 + 2
723
724			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
725			e.markShardDirty(nextHash)
726			e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
727			e.markShardDirty(nextHash2)
728
729			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
730				// Consider history as well.
731				var seq seq
732				var length int32
733				// length = 4 + e.matchlen(s+6, repIndex+4, src)
734				{
735					a := src[s+6:]
736					b := src[repIndex+4:]
737					endI := len(a) & (math.MaxInt32 - 7)
738					length = int32(endI) + 4
739					for i := 0; i < endI; i += 8 {
740						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
741							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
742							break
743						}
744					}
745				}
746
747				seq.matchLen = uint32(length - zstdMinMatch)
748
749				// We might be able to match backwards.
750				// Extend as long as we can.
751				start := s + 2
752				// We end the search early, so we don't risk 0 literals
753				// and have to do special offset treatment.
754				startLimit := nextEmit + 1
755
756				sMin := s - e.maxMatchOff
757				if sMin < 0 {
758					sMin = 0
759				}
760				for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
761					repIndex--
762					start--
763					seq.matchLen++
764				}
765				addLiterals(&seq, start)
766
767				// rep 0
768				seq.offset = 1
769				if debugSequences {
770					println("repeat sequence", seq, "next s:", s)
771				}
772				blk.sequences = append(blk.sequences, seq)
773				s += length + 2
774				nextEmit = s
775				if s >= sLimit {
776					if debug {
777						println("repeat ended", s, length)
778
779					}
780					break encodeLoop
781				}
782				cv = load6432(src, s)
783				continue
784			}
785			coffset0 := s - (candidate.offset - e.cur)
786			coffset1 := s - (candidate2.offset - e.cur) + 1
787			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
788				// found a regular match
789				t = candidate.offset - e.cur
790				if debugAsserts && s <= t {
791					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
792				}
793				if debugAsserts && s-t > e.maxMatchOff {
794					panic("s - t >e.maxMatchOff")
795				}
796				break
797			}
798
799			if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
800				// found a regular match
801				t = candidate2.offset - e.cur
802				s++
803				if debugAsserts && s <= t {
804					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
805				}
806				if debugAsserts && s-t > e.maxMatchOff {
807					panic("s - t >e.maxMatchOff")
808				}
809				if debugAsserts && t < 0 {
810					panic("t<0")
811				}
812				break
813			}
814			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
815			if s >= sLimit {
816				break encodeLoop
817			}
818			cv = load6432(src, s)
819		}
820		// A 4-byte match has been found. We'll later see if more than 4 bytes.
821		offset2 = offset1
822		offset1 = s - t
823
824		if debugAsserts && s <= t {
825			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
826		}
827
828		if debugAsserts && canRepeat && int(offset1) > len(src) {
829			panic("invalid offset")
830		}
831
832		// Extend the 4-byte match as long as possible.
833		//l := e.matchlen(s+4, t+4, src) + 4
834		var l int32
835		{
836			a := src[s+4:]
837			b := src[t+4:]
838			endI := len(a) & (math.MaxInt32 - 7)
839			l = int32(endI) + 4
840			for i := 0; i < endI; i += 8 {
841				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
842					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
843					break
844				}
845			}
846		}
847
848		// Extend backwards
849		tMin := s - e.maxMatchOff
850		if tMin < 0 {
851			tMin = 0
852		}
853		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
854			s--
855			t--
856			l++
857		}
858
859		// Write our sequence.
860		var seq seq
861		seq.litLen = uint32(s - nextEmit)
862		seq.matchLen = uint32(l - zstdMinMatch)
863		if seq.litLen > 0 {
864			blk.literals = append(blk.literals, src[nextEmit:s]...)
865		}
866		// Don't use repeat offsets
867		seq.offset = uint32(s-t) + 3
868		s += l
869		if debugSequences {
870			println("sequence", seq, "next s:", s)
871		}
872		blk.sequences = append(blk.sequences, seq)
873		nextEmit = s
874		if s >= sLimit {
875			break encodeLoop
876		}
877		cv = load6432(src, s)
878
879		// Check offset 2
880		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
881			// We have at least 4 byte match.
882			// No need to check backwards. We come straight from a match
883			//l := 4 + e.matchlen(s+4, o2+4, src)
884			var l int32
885			{
886				a := src[s+4:]
887				b := src[o2+4:]
888				endI := len(a) & (math.MaxInt32 - 7)
889				l = int32(endI) + 4
890				for i := 0; i < endI; i += 8 {
891					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
892						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
893						break
894					}
895				}
896			}
897
898			// Store this, since we have it.
899			nextHash := hash6(cv, hashLog)
900			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
901			e.markShardDirty(nextHash)
902			seq.matchLen = uint32(l) - zstdMinMatch
903			seq.litLen = 0
904			// Since litlen is always 0, this is offset 1.
905			seq.offset = 1
906			s += l
907			nextEmit = s
908			if debugSequences {
909				println("sequence", seq, "next s:", s)
910			}
911			blk.sequences = append(blk.sequences, seq)
912
913			// Swap offset 1 and 2.
914			offset1, offset2 = offset2, offset1
915			if s >= sLimit {
916				break encodeLoop
917			}
918			// Prepare next loop.
919			cv = load6432(src, s)
920		}
921	}
922
923	if int(nextEmit) < len(src) {
924		blk.literals = append(blk.literals, src[nextEmit:]...)
925		blk.extraLits = len(src) - int(nextEmit)
926	}
927	blk.recentOffsets[0] = uint32(offset1)
928	blk.recentOffsets[1] = uint32(offset2)
929	if debug {
930		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
931	}
932}
933
934// ResetDict will reset and set a dictionary if not nil
935func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
936	e.resetBase(d, singleBlock)
937	if d != nil {
938		panic("fastEncoder: Reset with dict")
939	}
940}
941
942// ResetDict will reset and set a dictionary if not nil
943func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
944	e.resetBase(d, singleBlock)
945	if d == nil {
946		return
947	}
948
949	// Init or copy dict table
950	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
951		if len(e.dictTable) != len(e.table) {
952			e.dictTable = make([]tableEntry, len(e.table))
953		}
954		if true {
955			end := e.maxMatchOff + int32(len(d.content)) - 8
956			for i := e.maxMatchOff; i < end; i += 3 {
957				const hashLog = tableBits
958
959				cv := load6432(d.content, i-e.maxMatchOff)
960				nextHash := hash6(cv, hashLog)      // 0 -> 5
961				nextHash1 := hash6(cv>>8, hashLog)  // 1 -> 6
962				nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
963				e.dictTable[nextHash] = tableEntry{
964					val:    uint32(cv),
965					offset: i,
966				}
967				e.dictTable[nextHash1] = tableEntry{
968					val:    uint32(cv >> 8),
969					offset: i + 1,
970				}
971				e.dictTable[nextHash2] = tableEntry{
972					val:    uint32(cv >> 16),
973					offset: i + 2,
974				}
975			}
976		}
977		e.lastDictID = d.id
978		e.allDirty = true
979	}
980
981	e.cur = e.maxMatchOff
982	dirtyShardCnt := 0
983	if !e.allDirty {
984		for i := range e.tableShardDirty {
985			if e.tableShardDirty[i] {
986				dirtyShardCnt++
987			}
988		}
989	}
990
991	const shardCnt = tableShardCnt
992	const shardSize = tableShardSize
993	if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
994		copy(e.table[:], e.dictTable)
995		for i := range e.tableShardDirty {
996			e.tableShardDirty[i] = false
997		}
998		e.allDirty = false
999		return
1000	}
1001	for i := range e.tableShardDirty {
1002		if !e.tableShardDirty[i] {
1003			continue
1004		}
1005
1006		copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1007		e.tableShardDirty[i] = false
1008	}
1009	e.allDirty = false
1010}
1011
1012func (e *fastEncoderDict) markAllShardsDirty() {
1013	e.allDirty = true
1014}
1015
1016func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
1017	e.tableShardDirty[entryNum/tableShardSize] = true
1018}
1019