1package zstd 2 3import ( 4 "errors" 5 "fmt" 6 "runtime" 7 "strings" 8) 9 10// EOption is an option for creating a encoder. 11type EOption func(*encoderOptions) error 12 13// options retains accumulated state of multiple options. 14type encoderOptions struct { 15 concurrent int 16 level EncoderLevel 17 single *bool 18 pad int 19 blockSize int 20 windowSize int 21 crc bool 22 fullZero bool 23 noEntropy bool 24 allLitEntropy bool 25 customWindow bool 26 customALEntropy bool 27 dict *dict 28} 29 30func (o *encoderOptions) setDefault() { 31 *o = encoderOptions{ 32 // use less ram: true for now, but may change. 33 concurrent: runtime.GOMAXPROCS(0), 34 crc: true, 35 single: nil, 36 blockSize: 1 << 16, 37 windowSize: 8 << 20, 38 level: SpeedDefault, 39 allLitEntropy: true, 40 } 41} 42 43// encoder returns an encoder with the selected options. 44func (o encoderOptions) encoder() encoder { 45 switch o.level { 46 case SpeedDefault: 47 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} 48 case SpeedBetterCompression: 49 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} 50 case SpeedBestCompression: 51 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} 52 case SpeedFastest: 53 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} 54 } 55 panic("unknown compression level") 56} 57 58// WithEncoderCRC will add CRC value to output. 59// Output will be 4 bytes larger. 60func WithEncoderCRC(b bool) EOption { 61 return func(o *encoderOptions) error { o.crc = b; return nil } 62} 63 64// WithEncoderConcurrency will set the concurrency, 65// meaning the maximum number of decoders to run concurrently. 66// The value supplied must be at least 1. 67// By default this will be set to GOMAXPROCS. 68func WithEncoderConcurrency(n int) EOption { 69 return func(o *encoderOptions) error { 70 if n <= 0 { 71 return fmt.Errorf("concurrency must be at least 1") 72 } 73 o.concurrent = n 74 return nil 75 } 76} 77 78// WithWindowSize will set the maximum allowed back-reference distance. 79// The value must be a power of two between MinWindowSize and MaxWindowSize. 80// A larger value will enable better compression but allocate more memory and, 81// for above-default values, take considerably longer. 82// The default value is determined by the compression level. 83func WithWindowSize(n int) EOption { 84 return func(o *encoderOptions) error { 85 switch { 86 case n < MinWindowSize: 87 return fmt.Errorf("window size must be at least %d", MinWindowSize) 88 case n > MaxWindowSize: 89 return fmt.Errorf("window size must be at most %d", MaxWindowSize) 90 case (n & (n - 1)) != 0: 91 return errors.New("window size must be a power of 2") 92 } 93 94 o.windowSize = n 95 o.customWindow = true 96 if o.blockSize > o.windowSize { 97 o.blockSize = o.windowSize 98 } 99 return nil 100 } 101} 102 103// WithEncoderPadding will add padding to all output so the size will be a multiple of n. 104// This can be used to obfuscate the exact output size or make blocks of a certain size. 105// The contents will be a skippable frame, so it will be invisible by the decoder. 106// n must be > 0 and <= 1GB, 1<<30 bytes. 107// The padded area will be filled with data from crypto/rand.Reader. 108// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this. 109func WithEncoderPadding(n int) EOption { 110 return func(o *encoderOptions) error { 111 if n <= 0 { 112 return fmt.Errorf("padding must be at least 1") 113 } 114 // No need to waste our time. 115 if n == 1 { 116 o.pad = 0 117 } 118 if n > 1<<30 { 119 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ") 120 } 121 o.pad = n 122 return nil 123 } 124} 125 126// EncoderLevel predefines encoder compression levels. 127// Only use the constants made available, since the actual mapping 128// of these values are very likely to change and your compression could change 129// unpredictably when upgrading the library. 130type EncoderLevel int 131 132const ( 133 speedNotSet EncoderLevel = iota 134 135 // SpeedFastest will choose the fastest reasonable compression. 136 // This is roughly equivalent to the fastest Zstandard mode. 137 SpeedFastest 138 139 // SpeedDefault is the default "pretty fast" compression option. 140 // This is roughly equivalent to the default Zstandard mode (level 3). 141 SpeedDefault 142 143 // SpeedBetterCompression will yield better compression than the default. 144 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. 145 // By using this, notice that CPU usage may go up in the future. 146 SpeedBetterCompression 147 148 // SpeedBestCompression will choose the best available compression option. 149 // This will offer the best compression no matter the CPU cost. 150 SpeedBestCompression 151 152 // speedLast should be kept as the last actual compression option. 153 // The is not for external usage, but is used to keep track of the valid options. 154 speedLast 155) 156 157// EncoderLevelFromString will convert a string representation of an encoding level back 158// to a compression level. The compare is not case sensitive. 159// If the string wasn't recognized, (false, SpeedDefault) will be returned. 160func EncoderLevelFromString(s string) (bool, EncoderLevel) { 161 for l := speedNotSet + 1; l < speedLast; l++ { 162 if strings.EqualFold(s, l.String()) { 163 return true, l 164 } 165 } 166 return false, SpeedDefault 167} 168 169// EncoderLevelFromZstd will return an encoder level that closest matches the compression 170// ratio of a specific zstd compression level. 171// Many input values will provide the same compression level. 172func EncoderLevelFromZstd(level int) EncoderLevel { 173 switch { 174 case level < 3: 175 return SpeedFastest 176 case level >= 3 && level < 6: 177 return SpeedDefault 178 case level >= 6 && level < 10: 179 return SpeedBetterCompression 180 case level >= 10: 181 return SpeedBetterCompression 182 } 183 return SpeedDefault 184} 185 186// String provides a string representation of the compression level. 187func (e EncoderLevel) String() string { 188 switch e { 189 case SpeedFastest: 190 return "fastest" 191 case SpeedDefault: 192 return "default" 193 case SpeedBetterCompression: 194 return "better" 195 case SpeedBestCompression: 196 return "best" 197 default: 198 return "invalid" 199 } 200} 201 202// WithEncoderLevel specifies a predefined compression level. 203func WithEncoderLevel(l EncoderLevel) EOption { 204 return func(o *encoderOptions) error { 205 switch { 206 case l <= speedNotSet || l >= speedLast: 207 return fmt.Errorf("unknown encoder level") 208 } 209 o.level = l 210 if !o.customWindow { 211 switch o.level { 212 case SpeedFastest: 213 o.windowSize = 4 << 20 214 case SpeedDefault: 215 o.windowSize = 8 << 20 216 case SpeedBetterCompression: 217 o.windowSize = 16 << 20 218 case SpeedBestCompression: 219 o.windowSize = 32 << 20 220 } 221 } 222 if !o.customALEntropy { 223 o.allLitEntropy = l > SpeedFastest 224 } 225 226 return nil 227 } 228} 229 230// WithZeroFrames will encode 0 length input as full frames. 231// This can be needed for compatibility with zstandard usage, 232// but is not needed for this package. 233func WithZeroFrames(b bool) EOption { 234 return func(o *encoderOptions) error { 235 o.fullZero = b 236 return nil 237 } 238} 239 240// WithAllLitEntropyCompression will apply entropy compression if no matches are found. 241// Disabling this will skip incompressible data faster, but in cases with no matches but 242// skewed character distribution compression is lost. 243// Default value depends on the compression level selected. 244func WithAllLitEntropyCompression(b bool) EOption { 245 return func(o *encoderOptions) error { 246 o.customALEntropy = true 247 o.allLitEntropy = b 248 return nil 249 } 250} 251 252// WithNoEntropyCompression will always skip entropy compression of literals. 253// This can be useful if content has matches, but unlikely to benefit from entropy 254// compression. Usually the slight speed improvement is not worth enabling this. 255func WithNoEntropyCompression(b bool) EOption { 256 return func(o *encoderOptions) error { 257 o.noEntropy = b 258 return nil 259 } 260} 261 262// WithSingleSegment will set the "single segment" flag when EncodeAll is used. 263// If this flag is set, data must be regenerated within a single continuous memory segment. 264// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present. 265// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content. 266// In order to preserve the decoder from unreasonable memory requirements, 267// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. 268// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. 269// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. 270// If this is not specified, block encodes will automatically choose this based on the input size. 271// This setting has no effect on streamed encodes. 272func WithSingleSegment(b bool) EOption { 273 return func(o *encoderOptions) error { 274 o.single = &b 275 return nil 276 } 277} 278 279// WithEncoderDict allows to register a dictionary that will be used for the encode. 280// The encoder *may* choose to use no dictionary instead for certain payloads. 281func WithEncoderDict(dict []byte) EOption { 282 return func(o *encoderOptions) error { 283 d, err := loadDict(dict) 284 if err != nil { 285 return err 286 } 287 o.dict = d 288 return nil 289 } 290} 291