1package zstd
2
3import (
4	"errors"
5	"fmt"
6	"runtime"
7	"strings"
8)
9
10// EOption is an option for creating a encoder.
11type EOption func(*encoderOptions) error
12
13// options retains accumulated state of multiple options.
14type encoderOptions struct {
15	concurrent      int
16	level           EncoderLevel
17	single          *bool
18	pad             int
19	blockSize       int
20	windowSize      int
21	crc             bool
22	fullZero        bool
23	noEntropy       bool
24	allLitEntropy   bool
25	customWindow    bool
26	customALEntropy bool
27	dict            *dict
28}
29
30func (o *encoderOptions) setDefault() {
31	*o = encoderOptions{
32		// use less ram: true for now, but may change.
33		concurrent:    runtime.GOMAXPROCS(0),
34		crc:           true,
35		single:        nil,
36		blockSize:     1 << 16,
37		windowSize:    8 << 20,
38		level:         SpeedDefault,
39		allLitEntropy: true,
40	}
41}
42
43// encoder returns an encoder with the selected options.
44func (o encoderOptions) encoder() encoder {
45	switch o.level {
46	case SpeedDefault:
47		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
48	case SpeedBetterCompression:
49		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
50	case SpeedBestCompression:
51		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
52	case SpeedFastest:
53		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
54	}
55	panic("unknown compression level")
56}
57
58// WithEncoderCRC will add CRC value to output.
59// Output will be 4 bytes larger.
60func WithEncoderCRC(b bool) EOption {
61	return func(o *encoderOptions) error { o.crc = b; return nil }
62}
63
64// WithEncoderConcurrency will set the concurrency,
65// meaning the maximum number of decoders to run concurrently.
66// The value supplied must be at least 1.
67// By default this will be set to GOMAXPROCS.
68func WithEncoderConcurrency(n int) EOption {
69	return func(o *encoderOptions) error {
70		if n <= 0 {
71			return fmt.Errorf("concurrency must be at least 1")
72		}
73		o.concurrent = n
74		return nil
75	}
76}
77
78// WithWindowSize will set the maximum allowed back-reference distance.
79// The value must be a power of two between MinWindowSize and MaxWindowSize.
80// A larger value will enable better compression but allocate more memory and,
81// for above-default values, take considerably longer.
82// The default value is determined by the compression level.
83func WithWindowSize(n int) EOption {
84	return func(o *encoderOptions) error {
85		switch {
86		case n < MinWindowSize:
87			return fmt.Errorf("window size must be at least %d", MinWindowSize)
88		case n > MaxWindowSize:
89			return fmt.Errorf("window size must be at most %d", MaxWindowSize)
90		case (n & (n - 1)) != 0:
91			return errors.New("window size must be a power of 2")
92		}
93
94		o.windowSize = n
95		o.customWindow = true
96		if o.blockSize > o.windowSize {
97			o.blockSize = o.windowSize
98		}
99		return nil
100	}
101}
102
103// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
104// This can be used to obfuscate the exact output size or make blocks of a certain size.
105// The contents will be a skippable frame, so it will be invisible by the decoder.
106// n must be > 0 and <= 1GB, 1<<30 bytes.
107// The padded area will be filled with data from crypto/rand.Reader.
108// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
109func WithEncoderPadding(n int) EOption {
110	return func(o *encoderOptions) error {
111		if n <= 0 {
112			return fmt.Errorf("padding must be at least 1")
113		}
114		// No need to waste our time.
115		if n == 1 {
116			o.pad = 0
117		}
118		if n > 1<<30 {
119			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
120		}
121		o.pad = n
122		return nil
123	}
124}
125
126// EncoderLevel predefines encoder compression levels.
127// Only use the constants made available, since the actual mapping
128// of these values are very likely to change and your compression could change
129// unpredictably when upgrading the library.
130type EncoderLevel int
131
132const (
133	speedNotSet EncoderLevel = iota
134
135	// SpeedFastest will choose the fastest reasonable compression.
136	// This is roughly equivalent to the fastest Zstandard mode.
137	SpeedFastest
138
139	// SpeedDefault is the default "pretty fast" compression option.
140	// This is roughly equivalent to the default Zstandard mode (level 3).
141	SpeedDefault
142
143	// SpeedBetterCompression will yield better compression than the default.
144	// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
145	// By using this, notice that CPU usage may go up in the future.
146	SpeedBetterCompression
147
148	// SpeedBestCompression will choose the best available compression option.
149	// This will offer the best compression no matter the CPU cost.
150	SpeedBestCompression
151
152	// speedLast should be kept as the last actual compression option.
153	// The is not for external usage, but is used to keep track of the valid options.
154	speedLast
155)
156
157// EncoderLevelFromString will convert a string representation of an encoding level back
158// to a compression level. The compare is not case sensitive.
159// If the string wasn't recognized, (false, SpeedDefault) will be returned.
160func EncoderLevelFromString(s string) (bool, EncoderLevel) {
161	for l := speedNotSet + 1; l < speedLast; l++ {
162		if strings.EqualFold(s, l.String()) {
163			return true, l
164		}
165	}
166	return false, SpeedDefault
167}
168
169// EncoderLevelFromZstd will return an encoder level that closest matches the compression
170// ratio of a specific zstd compression level.
171// Many input values will provide the same compression level.
172func EncoderLevelFromZstd(level int) EncoderLevel {
173	switch {
174	case level < 3:
175		return SpeedFastest
176	case level >= 3 && level < 6:
177		return SpeedDefault
178	case level >= 6 && level < 10:
179		return SpeedBetterCompression
180	case level >= 10:
181		return SpeedBetterCompression
182	}
183	return SpeedDefault
184}
185
186// String provides a string representation of the compression level.
187func (e EncoderLevel) String() string {
188	switch e {
189	case SpeedFastest:
190		return "fastest"
191	case SpeedDefault:
192		return "default"
193	case SpeedBetterCompression:
194		return "better"
195	case SpeedBestCompression:
196		return "best"
197	default:
198		return "invalid"
199	}
200}
201
202// WithEncoderLevel specifies a predefined compression level.
203func WithEncoderLevel(l EncoderLevel) EOption {
204	return func(o *encoderOptions) error {
205		switch {
206		case l <= speedNotSet || l >= speedLast:
207			return fmt.Errorf("unknown encoder level")
208		}
209		o.level = l
210		if !o.customWindow {
211			switch o.level {
212			case SpeedFastest:
213				o.windowSize = 4 << 20
214			case SpeedDefault:
215				o.windowSize = 8 << 20
216			case SpeedBetterCompression:
217				o.windowSize = 16 << 20
218			case SpeedBestCompression:
219				o.windowSize = 32 << 20
220			}
221		}
222		if !o.customALEntropy {
223			o.allLitEntropy = l > SpeedFastest
224		}
225
226		return nil
227	}
228}
229
230// WithZeroFrames will encode 0 length input as full frames.
231// This can be needed for compatibility with zstandard usage,
232// but is not needed for this package.
233func WithZeroFrames(b bool) EOption {
234	return func(o *encoderOptions) error {
235		o.fullZero = b
236		return nil
237	}
238}
239
240// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
241// Disabling this will skip incompressible data faster, but in cases with no matches but
242// skewed character distribution compression is lost.
243// Default value depends on the compression level selected.
244func WithAllLitEntropyCompression(b bool) EOption {
245	return func(o *encoderOptions) error {
246		o.customALEntropy = true
247		o.allLitEntropy = b
248		return nil
249	}
250}
251
252// WithNoEntropyCompression will always skip entropy compression of literals.
253// This can be useful if content has matches, but unlikely to benefit from entropy
254// compression. Usually the slight speed improvement is not worth enabling this.
255func WithNoEntropyCompression(b bool) EOption {
256	return func(o *encoderOptions) error {
257		o.noEntropy = b
258		return nil
259	}
260}
261
262// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
263// If this flag is set, data must be regenerated within a single continuous memory segment.
264// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
265// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
266// In order to preserve the decoder from unreasonable memory requirements,
267// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
268// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
269// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
270// If this is not specified, block encodes will automatically choose this based on the input size.
271// This setting has no effect on streamed encodes.
272func WithSingleSegment(b bool) EOption {
273	return func(o *encoderOptions) error {
274		o.single = &b
275		return nil
276	}
277}
278
279// WithEncoderDict allows to register a dictionary that will be used for the encode.
280// The encoder *may* choose to use no dictionary instead for certain payloads.
281func WithEncoderDict(dict []byte) EOption {
282	return func(o *encoderOptions) error {
283		d, err := loadDict(dict)
284		if err != nil {
285			return err
286		}
287		o.dict = d
288		return nil
289	}
290}
291