1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package brotli
6
7const (
8	// RFC section 3.5.
9	// This is the maximum bit-width of a prefix code.
10	// Thus, it is okay to use uint32 to store codes.
11	maxPrefixBits = 15
12
13	// RFC section 3.3.
14	// The size of the alphabet for various prefix codes.
15	numLitSyms        = 256                  // Literal symbols
16	maxNumDistSyms    = 16 + 120 + (48 << 3) // Distance symbols
17	numIaCSyms        = 704                  // Insert-and-copy length symbols
18	numBlkCntSyms     = 26                   // Block count symbols
19	maxNumBlkTypeSyms = 256 + 2              // Block type symbols
20	maxNumCtxMapSyms  = 256 + 16             // Context map symbols
21
22	// This should be the max of each of the constants above.
23	maxNumAlphabetSyms = numIaCSyms
24)
25
26var (
27	// RFC section 3.4.
28	// Prefix code lengths for simple codes.
29	simpleLens1  = [1]uint{0}
30	simpleLens2  = [2]uint{1, 1}
31	simpleLens3  = [3]uint{1, 2, 2}
32	simpleLens4a = [4]uint{2, 2, 2, 2}
33	simpleLens4b = [4]uint{1, 2, 3, 3}
34
35	// RFC section 3.5.
36	// Prefix code lengths for complex codes as they appear in the stream.
37	complexLens = [18]uint{
38		1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
39	}
40)
41
42type rangeCode struct {
43	base uint32 // Starting base offset of the range
44	bits uint32 // Bit-width of a subsequent integer to add to base offset
45}
46
47var (
48	// RFC section 5.
49	// LUT to convert an insert symbol to an actual insert length.
50	insLenRanges []rangeCode
51
52	// RFC section 5.
53	// LUT to convert an copy symbol to an actual copy length.
54	cpyLenRanges []rangeCode
55
56	// RFC section 6.
57	// LUT to convert an block-type length symbol to an actual length.
58	blkLenRanges []rangeCode
59
60	// RFC section 7.3.
61	// LUT to convert RLE symbol to an actual repeat length.
62	maxRLERanges []rangeCode
63)
64
65type prefixCode struct {
66	sym uint32 // The symbol being mapped
67	val uint32 // Value of the prefix code (must be in [0..1<<len])
68	len uint32 // Bit length of the prefix code
69}
70
71var (
72	// RFC section 3.5.
73	// Prefix codecs for code lengths in complex prefix definition.
74	codeCLens []prefixCode
75	decCLens  prefixDecoder
76	encCLens  prefixEncoder
77
78	// RFC section 7.3.
79	// Prefix codecs for RLEMAX in context map definition.
80	codeMaxRLE []prefixCode
81	decMaxRLE  prefixDecoder
82	encMaxRLE  prefixEncoder
83
84	// RFC section 9.1.
85	// Prefix codecs for WBITS in stream header definition.
86	codeWinBits []prefixCode
87	decWinBits  prefixDecoder
88	encWinBits  prefixEncoder
89
90	// RFC section 9.2.
91	// Prefix codecs used for size fields in meta-block header definition.
92	codeCounts []prefixCode
93	decCounts  prefixDecoder
94	encCounts  prefixEncoder
95)
96
97var (
98	// RFC section 5.
99	// Table to convert insert-and-copy symbols to insert and copy lengths.
100	iacLUT [numIaCSyms]struct{ ins, cpy rangeCode }
101
102	// RFC section 4.
103	// Table to help convert short-codes (first 16 symbols) to distances using
104	// the ring buffer of past distances.
105	distShortLUT [16]struct{ index, delta int }
106
107	// RFC section 4.
108	// Table to help convert long-codes to distances. This is two dimensional
109	// slice keyed by the NPOSTFIX and the normalized distance symbol.
110	distLongLUT [4][]rangeCode
111)
112
113func initPrefixLUTs() {
114	// Sanity check some constants.
115	for _, numMax := range []uint{
116		numLitSyms, maxNumDistSyms, numIaCSyms, numBlkCntSyms, maxNumBlkTypeSyms, maxNumCtxMapSyms,
117	} {
118		if numMax > maxNumAlphabetSyms {
119			panic("maximum alphabet size is not updated")
120		}
121	}
122	if maxNumAlphabetSyms >= 1<<prefixSymbolBits {
123		panic("maximum alphabet size is too large to represent")
124	}
125	if maxPrefixBits >= 1<<prefixCountBits {
126		panic("maximum prefix bit-length is too large to represent")
127	}
128
129	initPrefixRangeLUTs()
130	initPrefixCodeLUTs()
131	initLengthLUTs()
132}
133
134func initPrefixRangeLUTs() {
135	makeRanges := func(base uint, bits []uint) (rc []rangeCode) {
136		for _, nb := range bits {
137			rc = append(rc, rangeCode{base: uint32(base), bits: uint32(nb)})
138			base += 1 << nb
139		}
140		return rc
141	}
142
143	insLenRanges = makeRanges(0, []uint{
144		0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
145	}) // RFC section 5
146	cpyLenRanges = makeRanges(2, []uint{
147		0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
148	}) // RFC section 5
149	blkLenRanges = makeRanges(1, []uint{
150		2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24,
151	}) // RFC section 6
152	maxRLERanges = makeRanges(2, []uint{
153		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
154	}) // RFC section 7.3
155}
156
157func initPrefixCodeLUTs() {
158	// Prefix code for reading code lengths in RFC section 3.5.
159	codeCLens = nil
160	for sym, clen := range []uint{2, 4, 3, 2, 2, 4} {
161		code := prefixCode{sym: uint32(sym), len: uint32(clen)}
162		codeCLens = append(codeCLens, code)
163	}
164	decCLens.Init(codeCLens, true)
165	encCLens.Init(codeCLens)
166
167	// Prefix code for reading RLEMAX in RFC section 7.3.
168	codeMaxRLE = []prefixCode{{sym: 0, val: 0, len: 1}}
169	for i := uint32(0); i < 16; i++ {
170		code := prefixCode{sym: i + 1, val: i<<1 | 1, len: 5}
171		codeMaxRLE = append(codeMaxRLE, code)
172	}
173	decMaxRLE.Init(codeMaxRLE, false)
174	encMaxRLE.Init(codeMaxRLE)
175
176	// Prefix code for reading WBITS in RFC section 9.1.
177	codeWinBits = nil
178	for i := uint32(9); i <= 24; i++ {
179		var code prefixCode
180		switch {
181		case i == 16:
182			code = prefixCode{sym: i, val: (i-16)<<0 | 0, len: 1} // Symbols: 16
183		case i > 17:
184			code = prefixCode{sym: i, val: (i-17)<<1 | 1, len: 4} // Symbols: 18..24
185		case i < 17:
186			code = prefixCode{sym: i, val: (i-8)<<4 | 1, len: 7} // Symbols: 9..15
187		default:
188			code = prefixCode{sym: i, val: (i-17)<<4 | 1, len: 7} // Symbols: 17
189		}
190		codeWinBits = append(codeWinBits, code)
191	}
192	codeWinBits[0].sym = 0 // Invalid code "1000100" to use symbol zero
193	decWinBits.Init(codeWinBits, false)
194	encWinBits.Init(codeWinBits)
195
196	// Prefix code for reading counts in RFC section 9.2.
197	// This is used for: NBLTYPESL, NBLTYPESI, NBLTYPESD, NTREESL, and NTREESD.
198	codeCounts = []prefixCode{{sym: 1, val: 0, len: 1}}
199	code := codeCounts[len(codeCounts)-1]
200	for i := uint32(0); i < 8; i++ {
201		for j := uint32(0); j < 1<<i; j++ {
202			code.sym = code.sym + 1
203			code.val = j<<4 | i<<1 | 1
204			code.len = i + 4
205			codeCounts = append(codeCounts, code)
206		}
207	}
208	decCounts.Init(codeCounts, false)
209	encCounts.Init(codeCounts)
210}
211
212func initLengthLUTs() {
213	// RFC section 5.
214	// The insert-and-copy length symbol is converted into an insert length
215	// and a copy length. Thus, create a table to precompute the result for
216	// all input symbols.
217	for iacSym := range iacLUT {
218		var insSym, cpySym int
219		switch iacSym / 64 {
220		case 0, 2: // 0..63 and 128..191
221			insSym, cpySym = 0, 0
222		case 1, 3: // 64..127 and 192..255
223			insSym, cpySym = 0, 8
224		case 4: // 256..319
225			insSym, cpySym = 8, 0
226		case 5: // 320..383
227			insSym, cpySym = 8, 8
228		case 6: // 384..447
229			insSym, cpySym = 0, 16
230		case 7: // 448..511
231			insSym, cpySym = 16, 0
232		case 8: // 512..575
233			insSym, cpySym = 8, 16
234		case 9: // 576..639
235			insSym, cpySym = 16, 8
236		case 10: // 640..703
237			insSym, cpySym = 16, 16
238		}
239
240		r64 := iacSym % 64
241		insSym += r64 >> 3   // Lower 3 bits
242		cpySym += r64 & 0x07 // Upper 3 bits
243
244		iacLUT[iacSym].ins = insLenRanges[insSym]
245		iacLUT[iacSym].cpy = cpyLenRanges[cpySym]
246	}
247
248	// RFC section 4.
249	// The first 16 symbols modify a previously seen symbol. Thus, we can create
250	// a table to determine which distance to use and how much to modify it by.
251	for distSym := range distShortLUT {
252		var index, delta int
253		switch {
254		case distSym < 4:
255			index, delta = distSym, 0
256		case distSym < 10:
257			index, delta = 0, distSym/2-1
258		case distSym < 16:
259			index, delta = 1, distSym/2-4
260		}
261		if distSym%2 == 0 {
262			delta *= -1
263		}
264		distShortLUT[distSym].index = index
265		distShortLUT[distSym].delta = delta
266	}
267
268	// RFC section 4.
269	// Longer distances are computed according the equation in the RFC.
270	// To reduce computation during runtime, we precompute as much of the output
271	// as possible. Thus, we compute the final distance using the following:
272	//	rec := distLongLUT[NPOSTFIX][distSym - (16+NDIRECT)]
273	//	distance := NDIRECT + rec.base + ReadBits(rec.bits)<<NPOSTFIX
274	for npostfix := range distLongLUT {
275		numDistSyms := 48 << uint(npostfix)
276		distLongLUT[npostfix] = make([]rangeCode, numDistSyms)
277		for distSym := range distLongLUT[npostfix] {
278			postfixMask := 1<<uint(npostfix) - 1
279			hcode := distSym >> uint(npostfix)
280			lcode := distSym & postfixMask
281			nbits := 1 + distSym>>uint(npostfix+1)
282			offset := ((2 + (hcode & 1)) << uint(nbits)) - 4
283			distLongLUT[npostfix][distSym] = rangeCode{
284				base: uint32(offset<<uint(npostfix) + lcode + 1),
285				bits: uint32(nbits),
286			}
287		}
288	}
289}
290