1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package brotli 6 7const ( 8 // RFC section 3.5. 9 // This is the maximum bit-width of a prefix code. 10 // Thus, it is okay to use uint32 to store codes. 11 maxPrefixBits = 15 12 13 // RFC section 3.3. 14 // The size of the alphabet for various prefix codes. 15 numLitSyms = 256 // Literal symbols 16 maxNumDistSyms = 16 + 120 + (48 << 3) // Distance symbols 17 numIaCSyms = 704 // Insert-and-copy length symbols 18 numBlkCntSyms = 26 // Block count symbols 19 maxNumBlkTypeSyms = 256 + 2 // Block type symbols 20 maxNumCtxMapSyms = 256 + 16 // Context map symbols 21 22 // This should be the max of each of the constants above. 23 maxNumAlphabetSyms = numIaCSyms 24) 25 26var ( 27 // RFC section 3.4. 28 // Prefix code lengths for simple codes. 29 simpleLens1 = [1]uint{0} 30 simpleLens2 = [2]uint{1, 1} 31 simpleLens3 = [3]uint{1, 2, 2} 32 simpleLens4a = [4]uint{2, 2, 2, 2} 33 simpleLens4b = [4]uint{1, 2, 3, 3} 34 35 // RFC section 3.5. 36 // Prefix code lengths for complex codes as they appear in the stream. 37 complexLens = [18]uint{ 38 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 39 } 40) 41 42type rangeCode struct { 43 base uint32 // Starting base offset of the range 44 bits uint32 // Bit-width of a subsequent integer to add to base offset 45} 46 47var ( 48 // RFC section 5. 49 // LUT to convert an insert symbol to an actual insert length. 50 insLenRanges []rangeCode 51 52 // RFC section 5. 53 // LUT to convert an copy symbol to an actual copy length. 54 cpyLenRanges []rangeCode 55 56 // RFC section 6. 57 // LUT to convert an block-type length symbol to an actual length. 58 blkLenRanges []rangeCode 59 60 // RFC section 7.3. 61 // LUT to convert RLE symbol to an actual repeat length. 62 maxRLERanges []rangeCode 63) 64 65type prefixCode struct { 66 sym uint32 // The symbol being mapped 67 val uint32 // Value of the prefix code (must be in [0..1<<len]) 68 len uint32 // Bit length of the prefix code 69} 70 71var ( 72 // RFC section 3.5. 73 // Prefix codecs for code lengths in complex prefix definition. 74 codeCLens []prefixCode 75 decCLens prefixDecoder 76 encCLens prefixEncoder 77 78 // RFC section 7.3. 79 // Prefix codecs for RLEMAX in context map definition. 80 codeMaxRLE []prefixCode 81 decMaxRLE prefixDecoder 82 encMaxRLE prefixEncoder 83 84 // RFC section 9.1. 85 // Prefix codecs for WBITS in stream header definition. 86 codeWinBits []prefixCode 87 decWinBits prefixDecoder 88 encWinBits prefixEncoder 89 90 // RFC section 9.2. 91 // Prefix codecs used for size fields in meta-block header definition. 92 codeCounts []prefixCode 93 decCounts prefixDecoder 94 encCounts prefixEncoder 95) 96 97var ( 98 // RFC section 5. 99 // Table to convert insert-and-copy symbols to insert and copy lengths. 100 iacLUT [numIaCSyms]struct{ ins, cpy rangeCode } 101 102 // RFC section 4. 103 // Table to help convert short-codes (first 16 symbols) to distances using 104 // the ring buffer of past distances. 105 distShortLUT [16]struct{ index, delta int } 106 107 // RFC section 4. 108 // Table to help convert long-codes to distances. This is two dimensional 109 // slice keyed by the NPOSTFIX and the normalized distance symbol. 110 distLongLUT [4][]rangeCode 111) 112 113func initPrefixLUTs() { 114 // Sanity check some constants. 115 for _, numMax := range []uint{ 116 numLitSyms, maxNumDistSyms, numIaCSyms, numBlkCntSyms, maxNumBlkTypeSyms, maxNumCtxMapSyms, 117 } { 118 if numMax > maxNumAlphabetSyms { 119 panic("maximum alphabet size is not updated") 120 } 121 } 122 if maxNumAlphabetSyms >= 1<<prefixSymbolBits { 123 panic("maximum alphabet size is too large to represent") 124 } 125 if maxPrefixBits >= 1<<prefixCountBits { 126 panic("maximum prefix bit-length is too large to represent") 127 } 128 129 initPrefixRangeLUTs() 130 initPrefixCodeLUTs() 131 initLengthLUTs() 132} 133 134func initPrefixRangeLUTs() { 135 makeRanges := func(base uint, bits []uint) (rc []rangeCode) { 136 for _, nb := range bits { 137 rc = append(rc, rangeCode{base: uint32(base), bits: uint32(nb)}) 138 base += 1 << nb 139 } 140 return rc 141 } 142 143 insLenRanges = makeRanges(0, []uint{ 144 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24, 145 }) // RFC section 5 146 cpyLenRanges = makeRanges(2, []uint{ 147 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24, 148 }) // RFC section 5 149 blkLenRanges = makeRanges(1, []uint{ 150 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24, 151 }) // RFC section 6 152 maxRLERanges = makeRanges(2, []uint{ 153 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 154 }) // RFC section 7.3 155} 156 157func initPrefixCodeLUTs() { 158 // Prefix code for reading code lengths in RFC section 3.5. 159 codeCLens = nil 160 for sym, clen := range []uint{2, 4, 3, 2, 2, 4} { 161 code := prefixCode{sym: uint32(sym), len: uint32(clen)} 162 codeCLens = append(codeCLens, code) 163 } 164 decCLens.Init(codeCLens, true) 165 encCLens.Init(codeCLens) 166 167 // Prefix code for reading RLEMAX in RFC section 7.3. 168 codeMaxRLE = []prefixCode{{sym: 0, val: 0, len: 1}} 169 for i := uint32(0); i < 16; i++ { 170 code := prefixCode{sym: i + 1, val: i<<1 | 1, len: 5} 171 codeMaxRLE = append(codeMaxRLE, code) 172 } 173 decMaxRLE.Init(codeMaxRLE, false) 174 encMaxRLE.Init(codeMaxRLE) 175 176 // Prefix code for reading WBITS in RFC section 9.1. 177 codeWinBits = nil 178 for i := uint32(9); i <= 24; i++ { 179 var code prefixCode 180 switch { 181 case i == 16: 182 code = prefixCode{sym: i, val: (i-16)<<0 | 0, len: 1} // Symbols: 16 183 case i > 17: 184 code = prefixCode{sym: i, val: (i-17)<<1 | 1, len: 4} // Symbols: 18..24 185 case i < 17: 186 code = prefixCode{sym: i, val: (i-8)<<4 | 1, len: 7} // Symbols: 9..15 187 default: 188 code = prefixCode{sym: i, val: (i-17)<<4 | 1, len: 7} // Symbols: 17 189 } 190 codeWinBits = append(codeWinBits, code) 191 } 192 codeWinBits[0].sym = 0 // Invalid code "1000100" to use symbol zero 193 decWinBits.Init(codeWinBits, false) 194 encWinBits.Init(codeWinBits) 195 196 // Prefix code for reading counts in RFC section 9.2. 197 // This is used for: NBLTYPESL, NBLTYPESI, NBLTYPESD, NTREESL, and NTREESD. 198 codeCounts = []prefixCode{{sym: 1, val: 0, len: 1}} 199 code := codeCounts[len(codeCounts)-1] 200 for i := uint32(0); i < 8; i++ { 201 for j := uint32(0); j < 1<<i; j++ { 202 code.sym = code.sym + 1 203 code.val = j<<4 | i<<1 | 1 204 code.len = i + 4 205 codeCounts = append(codeCounts, code) 206 } 207 } 208 decCounts.Init(codeCounts, false) 209 encCounts.Init(codeCounts) 210} 211 212func initLengthLUTs() { 213 // RFC section 5. 214 // The insert-and-copy length symbol is converted into an insert length 215 // and a copy length. Thus, create a table to precompute the result for 216 // all input symbols. 217 for iacSym := range iacLUT { 218 var insSym, cpySym int 219 switch iacSym / 64 { 220 case 0, 2: // 0..63 and 128..191 221 insSym, cpySym = 0, 0 222 case 1, 3: // 64..127 and 192..255 223 insSym, cpySym = 0, 8 224 case 4: // 256..319 225 insSym, cpySym = 8, 0 226 case 5: // 320..383 227 insSym, cpySym = 8, 8 228 case 6: // 384..447 229 insSym, cpySym = 0, 16 230 case 7: // 448..511 231 insSym, cpySym = 16, 0 232 case 8: // 512..575 233 insSym, cpySym = 8, 16 234 case 9: // 576..639 235 insSym, cpySym = 16, 8 236 case 10: // 640..703 237 insSym, cpySym = 16, 16 238 } 239 240 r64 := iacSym % 64 241 insSym += r64 >> 3 // Lower 3 bits 242 cpySym += r64 & 0x07 // Upper 3 bits 243 244 iacLUT[iacSym].ins = insLenRanges[insSym] 245 iacLUT[iacSym].cpy = cpyLenRanges[cpySym] 246 } 247 248 // RFC section 4. 249 // The first 16 symbols modify a previously seen symbol. Thus, we can create 250 // a table to determine which distance to use and how much to modify it by. 251 for distSym := range distShortLUT { 252 var index, delta int 253 switch { 254 case distSym < 4: 255 index, delta = distSym, 0 256 case distSym < 10: 257 index, delta = 0, distSym/2-1 258 case distSym < 16: 259 index, delta = 1, distSym/2-4 260 } 261 if distSym%2 == 0 { 262 delta *= -1 263 } 264 distShortLUT[distSym].index = index 265 distShortLUT[distSym].delta = delta 266 } 267 268 // RFC section 4. 269 // Longer distances are computed according the equation in the RFC. 270 // To reduce computation during runtime, we precompute as much of the output 271 // as possible. Thus, we compute the final distance using the following: 272 // rec := distLongLUT[NPOSTFIX][distSym - (16+NDIRECT)] 273 // distance := NDIRECT + rec.base + ReadBits(rec.bits)<<NPOSTFIX 274 for npostfix := range distLongLUT { 275 numDistSyms := 48 << uint(npostfix) 276 distLongLUT[npostfix] = make([]rangeCode, numDistSyms) 277 for distSym := range distLongLUT[npostfix] { 278 postfixMask := 1<<uint(npostfix) - 1 279 hcode := distSym >> uint(npostfix) 280 lcode := distSym & postfixMask 281 nbits := 1 + distSym>>uint(npostfix+1) 282 offset := ((2 + (hcode & 1)) << uint(nbits)) - 4 283 distLongLUT[npostfix][distSym] = rangeCode{ 284 base: uint32(offset<<uint(npostfix) + lcode + 1), 285 bits: uint32(nbits), 286 } 287 } 288 } 289} 290