1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package flate
6
7// The dictDecoder implements the LZ77 sliding dictionary that is commonly used
8// in various compression formats. For performance reasons, this implementation
9// performs little to no sanity checks about the arguments. As such, the
10// invariants documented for each method call must be respected. Furthermore,
11// to reduce the memory footprint decompressing short streams, the dictionary
12// starts with a relatively small size and then lazily grows.
13
14const (
15	initSize   = 4096 // Initial size allocated for sliding dictionary
16	growFactor = 4    // Rate the dictionary is grown to match expected size
17)
18
19type dictDecoder struct {
20	// Invariant: len(hist) <= size
21	size int    // Sliding window size
22	hist []byte // Sliding window history, dynamically grown to match size
23
24	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
25	wrPos int  // Current output position in buffer
26	rdPos int  // Have emitted hist[:rdPos] already
27	full  bool // Has a full window length been written yet?
28}
29
30func (dd *dictDecoder) Init(size int) {
31	*dd = dictDecoder{hist: dd.hist}
32
33	// Regardless of what size claims, start with a small dictionary to avoid
34	// denial-of-service attacks with large memory allocation.
35	dd.size = size
36	if dd.hist == nil {
37		dd.hist = make([]byte, initSize)
38	}
39	dd.hist = dd.hist[:cap(dd.hist)]
40	if len(dd.hist) > dd.size {
41		dd.hist = dd.hist[:dd.size]
42	}
43}
44
45// HistSize reports the total amount of historical data in the dictionary.
46func (dd *dictDecoder) HistSize() int {
47	if dd.full {
48		return dd.size
49	}
50	return dd.wrPos
51}
52
53// AvailSize reports the available amount of output buffer space.
54func (dd *dictDecoder) AvailSize() int {
55	return len(dd.hist) - dd.wrPos
56}
57
58// WriteSlice returns a slice of the available buffer to write data to.
59//
60// This invariant will be kept: len(s) <= AvailSize()
61func (dd *dictDecoder) WriteSlice() []byte {
62	return dd.hist[dd.wrPos:]
63}
64
65// WriteMark advances the write pointer by cnt.
66//
67// This invariant must be kept: 0 <= cnt <= AvailSize()
68func (dd *dictDecoder) WriteMark(cnt int) {
69	dd.wrPos += cnt
70}
71
72// WriteByte writes a single byte to the dictionary.
73//
74// This invariant must be kept: 0 < AvailSize()
75func (dd *dictDecoder) WriteByte(c byte) {
76	dd.hist[dd.wrPos] = c
77	dd.wrPos++
78}
79
80// TryWriteCopy tries to copy a string at a given (distance, length) to the
81// output. This specialized version is optimized for short distances.
82//
83// This method is designed to be inlined for performance reasons.
84//
85// This invariant must be kept: 0 < dist <= HistSize()
86func (dd *dictDecoder) TryWriteCopy(dist, length int) int {
87	wrPos := dd.wrPos
88	wrEnd := wrPos + length
89	if wrPos < dist || wrEnd > len(dd.hist) {
90		return 0
91	}
92
93	// Copy overlapping section before destination.
94	wrBase := wrPos
95	rdPos := wrPos - dist
96loop:
97	wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
98	if wrPos < wrEnd {
99		goto loop // Avoid for-loop so that this function can be inlined
100	}
101	dd.wrPos = wrPos
102	return wrPos - wrBase
103}
104
105// WriteCopy copies a string at a given (distance, length) to the output.
106// This returns the number of bytes copied and may be less than the requested
107// length if the available space in the output buffer is too small.
108//
109// This invariant must be kept: 0 < dist <= HistSize()
110func (dd *dictDecoder) WriteCopy(dist, length int) int {
111	wrBase := dd.wrPos
112	wrPos := wrBase
113	rdPos := wrPos - dist
114	wrEnd := wrPos + length
115	if wrEnd > len(dd.hist) {
116		wrEnd = len(dd.hist)
117	}
118
119	// Copy non-overlapping section after destination.
120	if rdPos < 0 {
121		rdPos += len(dd.hist)
122		wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:])
123		rdPos = 0
124	}
125
126	// Copy overlapping section before destination.
127	for wrPos < wrEnd {
128		wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
129	}
130	dd.wrPos = wrPos
131	return wrPos - wrBase
132}
133
134// ReadFlush returns a slice of the historical buffer that is ready to be
135// emitted to the user. A call to ReadFlush is only valid after all of the data
136// from a previous call to ReadFlush has been consumed.
137func (dd *dictDecoder) ReadFlush() []byte {
138	toRead := dd.hist[dd.rdPos:dd.wrPos]
139	dd.rdPos = dd.wrPos
140	if dd.wrPos == len(dd.hist) {
141		if len(dd.hist) == dd.size {
142			dd.wrPos, dd.rdPos = 0, 0
143			dd.full = true
144		} else {
145			// Allocate a larger history buffer.
146			size := cap(dd.hist) * growFactor
147			if size > dd.size {
148				size = dd.size
149			}
150			hist := make([]byte, size)
151			copy(hist, dd.hist)
152			dd.hist = hist
153		}
154	}
155	return toRead
156}
157