1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package prefix 6 7import ( 8 "sort" 9 10 "github.com/dsnet/compress/internal" 11) 12 13// The algorithm used to decode variable length codes is based on the lookup 14// method in zlib. If the code is less-than-or-equal to maxChunkBits, 15// then the symbol can be decoded using a single lookup into the chunks table. 16// Otherwise, the links table will be used for a second level lookup. 17// 18// The chunks slice is keyed by the contents of the bit buffer ANDed with 19// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple 20// that is decoded as follow: 21// 22// var length = chunks[bitBuffer&chunkMask] & countMask 23// var symbol = chunks[bitBuffer&chunkMask] >> countBits 24// 25// If the decoded length is larger than chunkBits, then an overflow link table 26// must be used for further decoding. In this case, the symbol is actually the 27// index into the links tables. The second-level links table returned is 28// processed in the same way as the chunks table. 29// 30// if length > chunkBits { 31// var index = symbol // Previous symbol is index into links tables 32// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask 33// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits 34// } 35// 36// See the following: 37// http://www.gzip.org/algorithm.txt 38 39type Decoder struct { 40 chunks []uint32 // First-level lookup map 41 links [][]uint32 // Second-level lookup map 42 chunkMask uint32 // Mask the length of the chunks table 43 linkMask uint32 // Mask the length of the link table 44 chunkBits uint32 // Bit-length of the chunks table 45 46 MinBits uint32 // The minimum number of bits to safely make progress 47 NumSyms uint32 // Number of symbols 48} 49 50// Init initializes Decoder according to the codes provided. 51func (pd *Decoder) Init(codes PrefixCodes) { 52 // Handle special case trees. 53 if len(codes) <= 1 { 54 switch { 55 case len(codes) == 0: // Empty tree (should error if used later) 56 *pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0} 57 case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero) 58 pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0) 59 *pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1} 60 default: 61 panic("invalid codes") 62 } 63 return 64 } 65 if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) { 66 panic("input codes is not sorted") 67 } 68 if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) { 69 panic("detected incomplete or overlapping codes") 70 } 71 72 var minBits, maxBits uint32 = valueBits, 0 73 for _, c := range codes { 74 if minBits > c.Len { 75 minBits = c.Len 76 } 77 if maxBits < c.Len { 78 maxBits = c.Len 79 } 80 } 81 82 // Allocate chunks table as needed. 83 const maxChunkBits = 9 // This can be tuned for better performance 84 pd.NumSyms = uint32(len(codes)) 85 pd.MinBits = minBits 86 pd.chunkBits = maxBits 87 if pd.chunkBits > maxChunkBits { 88 pd.chunkBits = maxChunkBits 89 } 90 numChunks := 1 << pd.chunkBits 91 pd.chunks = allocUint32s(pd.chunks, numChunks) 92 pd.chunkMask = uint32(numChunks - 1) 93 94 // Allocate links tables as needed. 95 pd.links = pd.links[:0] 96 pd.linkMask = 0 97 if pd.chunkBits < maxBits { 98 numLinks := 1 << (maxBits - pd.chunkBits) 99 pd.linkMask = uint32(numLinks - 1) 100 101 var linkIdx uint32 102 for i := range pd.chunks { 103 pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized 104 } 105 for _, c := range codes { 106 if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 { 107 pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1) 108 linkIdx++ 109 } 110 } 111 112 pd.links = extendSliceUint32s(pd.links, int(linkIdx)) 113 linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx)) 114 for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks { 115 pd.links[i] = linksFlat[j : j+numLinks] 116 } 117 } 118 119 // Fill out chunks and links tables with values. 120 for _, c := range codes { 121 chunk := c.Sym<<countBits | c.Len 122 if c.Len <= pd.chunkBits { 123 skip := 1 << uint(c.Len) 124 for j := int(c.Val); j < len(pd.chunks); j += skip { 125 pd.chunks[j] = chunk 126 } 127 } else { 128 linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits 129 links := pd.links[linkIdx] 130 skip := 1 << uint(c.Len-pd.chunkBits) 131 for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip { 132 links[j] = chunk 133 } 134 } 135 } 136} 137