1// Copyright 2018 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5
6// Package fse provides Finite State Entropy encoding and decoding.
7//
8// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9// for byte blocks as implemented in zstd.
10//
11// See https://github.com/klauspost/compress/tree/master/fse for more information.
12package fse
13
14import (
15	"errors"
16	"fmt"
17	"math/bits"
18)
19
20const (
21	/*!MEMORY_USAGE :
22	 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23	 *  Increasing memory usage improves compression ratio
24	 *  Reduced memory usage can improve speed, due to cache effect
25	 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26	maxMemoryUsage     = 14
27	defaultMemoryUsage = 13
28
29	maxTableLog     = maxMemoryUsage - 2
30	maxTablesize    = 1 << maxTableLog
31	defaultTablelog = defaultMemoryUsage - 2
32	minTablelog     = 5
33	maxSymbolValue  = 255
34)
35
36var (
37	// ErrIncompressible is returned when input is judged to be too hard to compress.
38	ErrIncompressible = errors.New("input is not compressible")
39
40	// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
41	ErrUseRLE = errors.New("input is single value repeated")
42)
43
44// Scratch provides temporary storage for compression and decompression.
45type Scratch struct {
46	// Private
47	count          [maxSymbolValue + 1]uint32
48	norm           [maxSymbolValue + 1]int16
49	symbolLen      uint16 // Length of active part of the symbol table.
50	actualTableLog uint8  // Selected tablelog.
51	br             byteReader
52	bits           bitReader
53	bw             bitWriter
54	ct             cTable      // Compression tables.
55	decTable       []decSymbol // Decompression table.
56	zeroBits       bool        // no bits has prob > 50%.
57	clearCount     bool        // clear count
58	maxCount       int         // count of the most probable symbol
59
60	// Per block parameters.
61	// These can be used to override compression parameters of the block.
62	// Do not touch, unless you know what you are doing.
63
64	// Out is output buffer.
65	// If the scratch is re-used before the caller is done processing the output,
66	// set this field to nil.
67	// Otherwise the output buffer will be re-used for next Compression/Decompression step
68	// and allocation will be avoided.
69	Out []byte
70
71	// MaxSymbolValue will override the maximum symbol value of the next block.
72	MaxSymbolValue uint8
73
74	// TableLog will attempt to override the tablelog for the next block.
75	TableLog uint8
76
77	// DecompressLimit limits the maximum decoded size acceptable.
78	// If > 0 decompression will stop when approximately this many bytes
79	// has been decoded.
80	// If 0, maximum size will be 2GB.
81	DecompressLimit int
82}
83
84// Histogram allows to populate the histogram and skip that step in the compression,
85// It otherwise allows to inspect the histogram when compression is done.
86// To indicate that you have populated the histogram call HistogramFinished
87// with the value of the highest populated symbol, as well as the number of entries
88// in the most populated entry. These are accepted at face value.
89// The returned slice will always be length 256.
90func (s *Scratch) Histogram() []uint32 {
91	return s.count[:]
92}
93
94// HistogramFinished can be called to indicate that the histogram has been populated.
95// maxSymbol is the index of the highest set symbol of the next data segment.
96// maxCount is the number of entries in the most populated entry.
97// These are accepted at face value.
98func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
99	s.maxCount = maxCount
100	s.symbolLen = uint16(maxSymbol) + 1
101	s.clearCount = maxCount != 0
102}
103
104// prepare will prepare and allocate scratch tables used for both compression and decompression.
105func (s *Scratch) prepare(in []byte) (*Scratch, error) {
106	if s == nil {
107		s = &Scratch{}
108	}
109	if s.MaxSymbolValue == 0 {
110		s.MaxSymbolValue = 255
111	}
112	if s.TableLog == 0 {
113		s.TableLog = defaultTablelog
114	}
115	if s.TableLog > maxTableLog {
116		return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
117	}
118	if cap(s.Out) == 0 {
119		s.Out = make([]byte, 0, len(in))
120	}
121	if s.clearCount && s.maxCount == 0 {
122		for i := range s.count {
123			s.count[i] = 0
124		}
125		s.clearCount = false
126	}
127	s.br.init(in)
128	if s.DecompressLimit == 0 {
129		// Max size 2GB.
130		s.DecompressLimit = (2 << 30) - 1
131	}
132
133	return s, nil
134}
135
136// tableStep returns the next table index.
137func tableStep(tableSize uint32) uint32 {
138	return (tableSize >> 1) + (tableSize >> 3) + 3
139}
140
141func highBits(val uint32) (n uint32) {
142	return uint32(bits.Len32(val) - 1)
143}
144