1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package flate 6 7import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "io" 12 "math" 13) 14 15const ( 16 // From top 17 // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused 18 // 8 bits: xlength = length - MIN_MATCH_LENGTH 19 // 5 bits offsetcode 20 // 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal 21 lengthShift = 22 22 offsetMask = 1<<lengthShift - 1 23 typeMask = 3 << 30 24 literalType = 0 << 30 25 matchType = 1 << 30 26 matchOffsetOnlyMask = 0xffff 27) 28 29// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) 30// is lengthCodes[length - MIN_MATCH_LENGTH] 31var lengthCodes = [256]uint8{ 32 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 33 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 34 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 35 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 36 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 37 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 38 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 39 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 40 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 41 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 42 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 43 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 44 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 45 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 46 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 47 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 48 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 49 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 50 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 51 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 52 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 53 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 54 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 55 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 56 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 57 27, 27, 27, 27, 27, 28, 58} 59 60// lengthCodes1 is length codes, but starting at 1. 61var lengthCodes1 = [256]uint8{ 62 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 63 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, 64 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 65 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 66 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 67 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 68 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 69 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 70 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 71 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 72 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 73 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 74 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 75 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 76 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 77 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 78 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 79 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 80 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 81 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 82 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 83 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 84 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 85 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 86 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 87 28, 28, 28, 28, 28, 29, 88} 89 90var offsetCodes = [256]uint32{ 91 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 92 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 93 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 94 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 95 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 96 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 97 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 98 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 99 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 100 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 101 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 102 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 103 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 104 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 105 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 106 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 107} 108 109// offsetCodes14 are offsetCodes, but with 14 added. 110var offsetCodes14 = [256]uint32{ 111 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 112 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 113 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 114 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 115 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 116 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 117 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 118 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 119 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 120 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 121 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 122 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 123 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 124 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 125 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 126 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 127} 128 129type token uint32 130 131type tokens struct { 132 nLits int 133 extraHist [32]uint16 // codes 256->maxnumlit 134 offHist [32]uint16 // offset codes 135 litHist [256]uint16 // codes 0->255 136 n uint16 // Must be able to contain maxStoreBlockSize 137 tokens [maxStoreBlockSize + 1]token 138} 139 140func (t *tokens) Reset() { 141 if t.n == 0 { 142 return 143 } 144 t.n = 0 145 t.nLits = 0 146 for i := range t.litHist[:] { 147 t.litHist[i] = 0 148 } 149 for i := range t.extraHist[:] { 150 t.extraHist[i] = 0 151 } 152 for i := range t.offHist[:] { 153 t.offHist[i] = 0 154 } 155} 156 157func (t *tokens) Fill() { 158 if t.n == 0 { 159 return 160 } 161 for i, v := range t.litHist[:] { 162 if v == 0 { 163 t.litHist[i] = 1 164 t.nLits++ 165 } 166 } 167 for i, v := range t.extraHist[:literalCount-256] { 168 if v == 0 { 169 t.nLits++ 170 t.extraHist[i] = 1 171 } 172 } 173 for i, v := range t.offHist[:offsetCodeCount] { 174 if v == 0 { 175 t.offHist[i] = 1 176 } 177 } 178} 179 180func indexTokens(in []token) tokens { 181 var t tokens 182 t.indexTokens(in) 183 return t 184} 185 186func (t *tokens) indexTokens(in []token) { 187 t.Reset() 188 for _, tok := range in { 189 if tok < matchType { 190 t.AddLiteral(tok.literal()) 191 continue 192 } 193 t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) 194 } 195} 196 197// emitLiteral writes a literal chunk and returns the number of bytes written. 198func emitLiteral(dst *tokens, lit []byte) { 199 ol := int(dst.n) 200 for i, v := range lit { 201 dst.tokens[(i+ol)&maxStoreBlockSize] = token(v) 202 dst.litHist[v]++ 203 } 204 dst.n += uint16(len(lit)) 205 dst.nLits += len(lit) 206} 207 208func (t *tokens) AddLiteral(lit byte) { 209 t.tokens[t.n] = token(lit) 210 t.litHist[lit]++ 211 t.n++ 212 t.nLits++ 213} 214 215// from https://stackoverflow.com/a/28730362 216func mFastLog2(val float32) float32 { 217 ux := int32(math.Float32bits(val)) 218 log2 := (float32)(((ux >> 23) & 255) - 128) 219 ux &= -0x7f800001 220 ux += 127 << 23 221 uval := math.Float32frombits(uint32(ux)) 222 log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 223 return log2 224} 225 226// EstimatedBits will return an minimum size estimated by an *optimal* 227// compression of the block. 228// The size of the block 229func (t *tokens) EstimatedBits() int { 230 shannon := float32(0) 231 bits := int(0) 232 nMatches := 0 233 if t.nLits > 0 { 234 invTotal := 1.0 / float32(t.nLits) 235 for _, v := range t.litHist[:] { 236 if v > 0 { 237 n := float32(v) 238 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n 239 } 240 } 241 // Just add 15 for EOB 242 shannon += 15 243 for i, v := range t.extraHist[1 : literalCount-256] { 244 if v > 0 { 245 n := float32(v) 246 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n 247 bits += int(lengthExtraBits[i&31]) * int(v) 248 nMatches += int(v) 249 } 250 } 251 } 252 if nMatches > 0 { 253 invTotal := 1.0 / float32(nMatches) 254 for i, v := range t.offHist[:offsetCodeCount] { 255 if v > 0 { 256 n := float32(v) 257 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n 258 bits += int(offsetExtraBits[i&31]) * int(v) 259 } 260 } 261 } 262 return int(shannon) + bits 263} 264 265// AddMatch adds a match to the tokens. 266// This function is very sensitive to inlining and right on the border. 267func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { 268 if debugDeflate { 269 if xlength >= maxMatchLength+baseMatchLength { 270 panic(fmt.Errorf("invalid length: %v", xlength)) 271 } 272 if xoffset >= maxMatchOffset+baseMatchOffset { 273 panic(fmt.Errorf("invalid offset: %v", xoffset)) 274 } 275 } 276 oCode := offsetCode(xoffset) 277 xoffset |= oCode << 16 278 t.nLits++ 279 280 t.extraHist[lengthCodes1[uint8(xlength)]]++ 281 t.offHist[oCode]++ 282 t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset) 283 t.n++ 284} 285 286// AddMatchLong adds a match to the tokens, potentially longer than max match length. 287// Length should NOT have the base subtracted, only offset should. 288func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { 289 if debugDeflate { 290 if xoffset >= maxMatchOffset+baseMatchOffset { 291 panic(fmt.Errorf("invalid offset: %v", xoffset)) 292 } 293 } 294 oc := offsetCode(xoffset) 295 xoffset |= oc << 16 296 for xlength > 0 { 297 xl := xlength 298 if xl > 258 { 299 // We need to have at least baseMatchLength left over for next loop. 300 xl = 258 - baseMatchLength 301 } 302 xlength -= xl 303 xl -= baseMatchLength 304 t.nLits++ 305 t.extraHist[lengthCodes1[uint8(xl)]]++ 306 t.offHist[oc]++ 307 t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) 308 t.n++ 309 } 310} 311 312func (t *tokens) AddEOB() { 313 t.tokens[t.n] = token(endBlockMarker) 314 t.extraHist[0]++ 315 t.n++ 316} 317 318func (t *tokens) Slice() []token { 319 return t.tokens[:t.n] 320} 321 322// VarInt returns the tokens as varint encoded bytes. 323func (t *tokens) VarInt() []byte { 324 var b = make([]byte, binary.MaxVarintLen32*int(t.n)) 325 var off int 326 for _, v := range t.tokens[:t.n] { 327 off += binary.PutUvarint(b[off:], uint64(v)) 328 } 329 return b[:off] 330} 331 332// FromVarInt restores t to the varint encoded tokens provided. 333// Any data in t is removed. 334func (t *tokens) FromVarInt(b []byte) error { 335 var buf = bytes.NewReader(b) 336 var toks []token 337 for { 338 r, err := binary.ReadUvarint(buf) 339 if err == io.EOF { 340 break 341 } 342 if err != nil { 343 return err 344 } 345 toks = append(toks, token(r)) 346 } 347 t.indexTokens(toks) 348 return nil 349} 350 351// Returns the type of a token 352func (t token) typ() uint32 { return uint32(t) & typeMask } 353 354// Returns the literal of a literal token 355func (t token) literal() uint8 { return uint8(t) } 356 357// Returns the extra offset of a match token 358func (t token) offset() uint32 { return uint32(t) & offsetMask } 359 360func (t token) length() uint8 { return uint8(t >> lengthShift) } 361 362// The code is never more than 8 bits, but is returned as uint32 for convenience. 363func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) } 364 365// Returns the offset code corresponding to a specific offset 366func offsetCode(off uint32) uint32 { 367 if false { 368 if off < uint32(len(offsetCodes)) { 369 return offsetCodes[off&255] 370 } else if off>>7 < uint32(len(offsetCodes)) { 371 return offsetCodes[(off>>7)&255] + 14 372 } else { 373 return offsetCodes[(off>>14)&255] + 28 374 } 375 } 376 if off < uint32(len(offsetCodes)) { 377 return offsetCodes[uint8(off)] 378 } 379 return offsetCodes14[uint8(off>>7)] 380} 381