1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package prefix
6
7import (
8	"sort"
9
10	"github.com/dsnet/compress/internal"
11)
12
13// The algorithm used to decode variable length codes is based on the lookup
14// method in zlib. If the code is less-than-or-equal to maxChunkBits,
15// then the symbol can be decoded using a single lookup into the chunks table.
16// Otherwise, the links table will be used for a second level lookup.
17//
18// The chunks slice is keyed by the contents of the bit buffer ANDed with
19// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
20// that is decoded as follow:
21//
22//	var length = chunks[bitBuffer&chunkMask] & countMask
23//	var symbol = chunks[bitBuffer&chunkMask] >> countBits
24//
25// If the decoded length is larger than chunkBits, then an overflow link table
26// must be used for further decoding. In this case, the symbol is actually the
27// index into the links tables. The second-level links table returned is
28// processed in the same way as the chunks table.
29//
30//	if length > chunkBits {
31//		var index = symbol // Previous symbol is index into links tables
32//		length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
33//		symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
34//	}
35//
36// See the following:
37//	http://www.gzip.org/algorithm.txt
38
39type Decoder struct {
40	chunks    []uint32   // First-level lookup map
41	links     [][]uint32 // Second-level lookup map
42	chunkMask uint32     // Mask the length of the chunks table
43	linkMask  uint32     // Mask the length of the link table
44	chunkBits uint32     // Bit-length of the chunks table
45
46	MinBits uint32 // The minimum number of bits to safely make progress
47	NumSyms uint32 // Number of symbols
48}
49
50// Init initializes Decoder according to the codes provided.
51func (pd *Decoder) Init(codes PrefixCodes) {
52	// Handle special case trees.
53	if len(codes) <= 1 {
54		switch {
55		case len(codes) == 0: // Empty tree (should error if used later)
56			*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
57		case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
58			pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
59			*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
60		default:
61			panic("invalid codes")
62		}
63		return
64	}
65	if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
66		panic("input codes is not sorted")
67	}
68	if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
69		panic("detected incomplete or overlapping codes")
70	}
71
72	var minBits, maxBits uint32 = valueBits, 0
73	for _, c := range codes {
74		if minBits > c.Len {
75			minBits = c.Len
76		}
77		if maxBits < c.Len {
78			maxBits = c.Len
79		}
80	}
81
82	// Allocate chunks table as needed.
83	const maxChunkBits = 9 // This can be tuned for better performance
84	pd.NumSyms = uint32(len(codes))
85	pd.MinBits = minBits
86	pd.chunkBits = maxBits
87	if pd.chunkBits > maxChunkBits {
88		pd.chunkBits = maxChunkBits
89	}
90	numChunks := 1 << pd.chunkBits
91	pd.chunks = allocUint32s(pd.chunks, numChunks)
92	pd.chunkMask = uint32(numChunks - 1)
93
94	// Allocate links tables as needed.
95	pd.links = pd.links[:0]
96	pd.linkMask = 0
97	if pd.chunkBits < maxBits {
98		numLinks := 1 << (maxBits - pd.chunkBits)
99		pd.linkMask = uint32(numLinks - 1)
100
101		var linkIdx uint32
102		for i := range pd.chunks {
103			pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
104		}
105		for _, c := range codes {
106			if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
107				pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
108				linkIdx++
109			}
110		}
111
112		pd.links = extendSliceUint32s(pd.links, int(linkIdx))
113		linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
114		for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
115			pd.links[i] = linksFlat[j : j+numLinks]
116		}
117	}
118
119	// Fill out chunks and links tables with values.
120	for _, c := range codes {
121		chunk := c.Sym<<countBits | c.Len
122		if c.Len <= pd.chunkBits {
123			skip := 1 << uint(c.Len)
124			for j := int(c.Val); j < len(pd.chunks); j += skip {
125				pd.chunks[j] = chunk
126			}
127		} else {
128			linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
129			links := pd.links[linkIdx]
130			skip := 1 << uint(c.Len-pd.chunkBits)
131			for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
132				links[j] = chunk
133			}
134		}
135	}
136}
137