1// Copyright 2019+ Klaus Post. All rights reserved. 2// License information can be found in the LICENSE file. 3// Based on work by Yann Collet, released under BSD License. 4 5package zstd 6 7import ( 8 "errors" 9 "fmt" 10 "math" 11 "math/bits" 12 13 "github.com/klauspost/compress/huff0" 14) 15 16type blockEnc struct { 17 size int 18 literals []byte 19 sequences []seq 20 coders seqCoders 21 litEnc *huff0.Scratch 22 wr bitWriter 23 24 extraLits int 25 last bool 26 27 output []byte 28 recentOffsets [3]uint32 29 prevRecentOffsets [3]uint32 30} 31 32// init should be used once the block has been created. 33// If called more than once, the effect is the same as calling reset. 34func (b *blockEnc) init() { 35 if cap(b.literals) < maxCompressedLiteralSize { 36 b.literals = make([]byte, 0, maxCompressedLiteralSize) 37 } 38 const defSeqs = 200 39 b.literals = b.literals[:0] 40 if cap(b.sequences) < defSeqs { 41 b.sequences = make([]seq, 0, defSeqs) 42 } 43 if cap(b.output) < maxCompressedBlockSize { 44 b.output = make([]byte, 0, maxCompressedBlockSize) 45 } 46 if b.coders.mlEnc == nil { 47 b.coders.mlEnc = &fseEncoder{} 48 b.coders.mlPrev = &fseEncoder{} 49 b.coders.ofEnc = &fseEncoder{} 50 b.coders.ofPrev = &fseEncoder{} 51 b.coders.llEnc = &fseEncoder{} 52 b.coders.llPrev = &fseEncoder{} 53 } 54 b.litEnc = &huff0.Scratch{WantLogLess: 4} 55 b.reset(nil) 56} 57 58// initNewEncode can be used to reset offsets and encoders to the initial state. 59func (b *blockEnc) initNewEncode() { 60 b.recentOffsets = [3]uint32{1, 4, 8} 61 b.litEnc.Reuse = huff0.ReusePolicyNone 62 b.coders.setPrev(nil, nil, nil) 63} 64 65// reset will reset the block for a new encode, but in the same stream, 66// meaning that state will be carried over, but the block content is reset. 67// If a previous block is provided, the recent offsets are carried over. 68func (b *blockEnc) reset(prev *blockEnc) { 69 b.extraLits = 0 70 b.literals = b.literals[:0] 71 b.size = 0 72 b.sequences = b.sequences[:0] 73 b.output = b.output[:0] 74 b.last = false 75 if prev != nil { 76 b.recentOffsets = prev.prevRecentOffsets 77 } 78} 79 80// reset will reset the block for a new encode, but in the same stream, 81// meaning that state will be carried over, but the block content is reset. 82// If a previous block is provided, the recent offsets are carried over. 83func (b *blockEnc) swapEncoders(prev *blockEnc) { 84 b.coders.swap(&prev.coders) 85 b.litEnc, prev.litEnc = prev.litEnc, b.litEnc 86} 87 88// blockHeader contains the information for a block header. 89type blockHeader uint32 90 91// setLast sets the 'last' indicator on a block. 92func (h *blockHeader) setLast(b bool) { 93 if b { 94 *h = *h | 1 95 } else { 96 const mask = (1 << 24) - 2 97 *h = *h & mask 98 } 99} 100 101// setSize will store the compressed size of a block. 102func (h *blockHeader) setSize(v uint32) { 103 const mask = 7 104 *h = (*h)&mask | blockHeader(v<<3) 105} 106 107// setType sets the block type. 108func (h *blockHeader) setType(t blockType) { 109 const mask = 1 | (((1 << 24) - 1) ^ 7) 110 *h = (*h & mask) | blockHeader(t<<1) 111} 112 113// appendTo will append the block header to a slice. 114func (h blockHeader) appendTo(b []byte) []byte { 115 return append(b, uint8(h), uint8(h>>8), uint8(h>>16)) 116} 117 118// String returns a string representation of the block. 119func (h blockHeader) String() string { 120 return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1) 121} 122 123// literalsHeader contains literals header information. 124type literalsHeader uint64 125 126// setType can be used to set the type of literal block. 127func (h *literalsHeader) setType(t literalsBlockType) { 128 const mask = math.MaxUint64 - 3 129 *h = (*h & mask) | literalsHeader(t) 130} 131 132// setSize can be used to set a single size, for uncompressed and RLE content. 133func (h *literalsHeader) setSize(regenLen int) { 134 inBits := bits.Len32(uint32(regenLen)) 135 // Only retain 2 bits 136 const mask = 3 137 lh := uint64(*h & mask) 138 switch { 139 case inBits < 5: 140 lh |= (uint64(regenLen) << 3) | (1 << 60) 141 if debug { 142 got := int(lh>>3) & 0xff 143 if got != regenLen { 144 panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) 145 } 146 } 147 case inBits < 12: 148 lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60) 149 case inBits < 20: 150 lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60) 151 default: 152 panic(fmt.Errorf("internal error: block too big (%d)", regenLen)) 153 } 154 *h = literalsHeader(lh) 155} 156 157// setSizes will set the size of a compressed literals section and the input length. 158func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { 159 compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) 160 // Only retain 2 bits 161 const mask = 3 162 lh := uint64(*h & mask) 163 switch { 164 case compBits <= 10 && inBits <= 10: 165 if !single { 166 lh |= 1 << 2 167 } 168 lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) 169 if debug { 170 const mmask = (1 << 24) - 1 171 n := (lh >> 4) & mmask 172 if int(n&1023) != inLen { 173 panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits)) 174 } 175 if int(n>>10) != compLen { 176 panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits)) 177 } 178 } 179 case compBits <= 14 && inBits <= 14: 180 lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) 181 if single { 182 panic("single stream used with more than 10 bits length.") 183 } 184 case compBits <= 18 && inBits <= 18: 185 lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) 186 if single { 187 panic("single stream used with more than 10 bits length.") 188 } 189 default: 190 panic("internal error: block too big") 191 } 192 *h = literalsHeader(lh) 193} 194 195// appendTo will append the literals header to a byte slice. 196func (h literalsHeader) appendTo(b []byte) []byte { 197 size := uint8(h >> 60) 198 switch size { 199 case 1: 200 b = append(b, uint8(h)) 201 case 2: 202 b = append(b, uint8(h), uint8(h>>8)) 203 case 3: 204 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16)) 205 case 4: 206 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24)) 207 case 5: 208 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32)) 209 default: 210 panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size)) 211 } 212 return b 213} 214 215// size returns the output size with currently set values. 216func (h literalsHeader) size() int { 217 return int(h >> 60) 218} 219 220func (h literalsHeader) String() string { 221 return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60) 222} 223 224// pushOffsets will push the recent offsets to the backup store. 225func (b *blockEnc) pushOffsets() { 226 b.prevRecentOffsets = b.recentOffsets 227} 228 229// pushOffsets will push the recent offsets to the backup store. 230func (b *blockEnc) popOffsets() { 231 b.recentOffsets = b.prevRecentOffsets 232} 233 234// matchOffset will adjust recent offsets and return the adjusted one, 235// if it matches a previous offset. 236func (b *blockEnc) matchOffset(offset, lits uint32) uint32 { 237 // Check if offset is one of the recent offsets. 238 // Adjusts the output offset accordingly. 239 // Gives a tiny bit of compression, typically around 1%. 240 if true { 241 if lits > 0 { 242 switch offset { 243 case b.recentOffsets[0]: 244 offset = 1 245 case b.recentOffsets[1]: 246 b.recentOffsets[1] = b.recentOffsets[0] 247 b.recentOffsets[0] = offset 248 offset = 2 249 case b.recentOffsets[2]: 250 b.recentOffsets[2] = b.recentOffsets[1] 251 b.recentOffsets[1] = b.recentOffsets[0] 252 b.recentOffsets[0] = offset 253 offset = 3 254 default: 255 b.recentOffsets[2] = b.recentOffsets[1] 256 b.recentOffsets[1] = b.recentOffsets[0] 257 b.recentOffsets[0] = offset 258 offset += 3 259 } 260 } else { 261 switch offset { 262 case b.recentOffsets[1]: 263 b.recentOffsets[1] = b.recentOffsets[0] 264 b.recentOffsets[0] = offset 265 offset = 1 266 case b.recentOffsets[2]: 267 b.recentOffsets[2] = b.recentOffsets[1] 268 b.recentOffsets[1] = b.recentOffsets[0] 269 b.recentOffsets[0] = offset 270 offset = 2 271 case b.recentOffsets[0] - 1: 272 b.recentOffsets[2] = b.recentOffsets[1] 273 b.recentOffsets[1] = b.recentOffsets[0] 274 b.recentOffsets[0] = offset 275 offset = 3 276 default: 277 b.recentOffsets[2] = b.recentOffsets[1] 278 b.recentOffsets[1] = b.recentOffsets[0] 279 b.recentOffsets[0] = offset 280 offset += 3 281 } 282 } 283 } else { 284 offset += 3 285 } 286 return offset 287} 288 289// encodeRaw can be used to set the output to a raw representation of supplied bytes. 290func (b *blockEnc) encodeRaw(a []byte) { 291 var bh blockHeader 292 bh.setLast(b.last) 293 bh.setSize(uint32(len(a))) 294 bh.setType(blockTypeRaw) 295 b.output = bh.appendTo(b.output[:0]) 296 b.output = append(b.output, a...) 297 if debug { 298 println("Adding RAW block, length", len(a)) 299 } 300} 301 302// encodeRaw can be used to set the output to a raw representation of supplied bytes. 303func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { 304 var bh blockHeader 305 bh.setLast(b.last) 306 bh.setSize(uint32(len(src))) 307 bh.setType(blockTypeRaw) 308 dst = bh.appendTo(dst) 309 dst = append(dst, src...) 310 if debug { 311 println("Adding RAW block, length", len(src)) 312 } 313 return dst 314} 315 316// encodeLits can be used if the block is only litLen. 317func (b *blockEnc) encodeLits(raw bool) error { 318 var bh blockHeader 319 bh.setLast(b.last) 320 bh.setSize(uint32(len(b.literals))) 321 322 // Don't compress extremely small blocks 323 if len(b.literals) < 32 || raw { 324 if debug { 325 println("Adding RAW block, length", len(b.literals)) 326 } 327 bh.setType(blockTypeRaw) 328 b.output = bh.appendTo(b.output) 329 b.output = append(b.output, b.literals...) 330 return nil 331 } 332 333 var ( 334 out []byte 335 reUsed, single bool 336 err error 337 ) 338 if len(b.literals) >= 1024 { 339 // Use 4 Streams. 340 out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) 341 if len(out) > len(b.literals)-len(b.literals)>>4 { 342 // Bail out of compression is too little. 343 err = huff0.ErrIncompressible 344 } 345 } else if len(b.literals) > 32 { 346 // Use 1 stream 347 single = true 348 out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) 349 if len(out) > len(b.literals)-len(b.literals)>>4 { 350 // Bail out of compression is too little. 351 err = huff0.ErrIncompressible 352 } 353 } else { 354 err = huff0.ErrIncompressible 355 } 356 357 switch err { 358 case huff0.ErrIncompressible: 359 if debug { 360 println("Adding RAW block, length", len(b.literals)) 361 } 362 bh.setType(blockTypeRaw) 363 b.output = bh.appendTo(b.output) 364 b.output = append(b.output, b.literals...) 365 return nil 366 case huff0.ErrUseRLE: 367 if debug { 368 println("Adding RLE block, length", len(b.literals)) 369 } 370 bh.setType(blockTypeRLE) 371 b.output = bh.appendTo(b.output) 372 b.output = append(b.output, b.literals[0]) 373 return nil 374 default: 375 return err 376 case nil: 377 } 378 // Compressed... 379 // Now, allow reuse 380 b.litEnc.Reuse = huff0.ReusePolicyAllow 381 bh.setType(blockTypeCompressed) 382 var lh literalsHeader 383 if reUsed { 384 if debug { 385 println("Reused tree, compressed to", len(out)) 386 } 387 lh.setType(literalsBlockTreeless) 388 } else { 389 if debug { 390 println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) 391 } 392 lh.setType(literalsBlockCompressed) 393 } 394 // Set sizes 395 lh.setSizes(len(out), len(b.literals), single) 396 bh.setSize(uint32(len(out) + lh.size() + 1)) 397 398 // Write block headers. 399 b.output = bh.appendTo(b.output) 400 b.output = lh.appendTo(b.output) 401 // Add compressed data. 402 b.output = append(b.output, out...) 403 // No sequences. 404 b.output = append(b.output, 0) 405 return nil 406} 407 408// fuzzFseEncoder can be used to fuzz the FSE encoder. 409func fuzzFseEncoder(data []byte) int { 410 if len(data) > maxSequences || len(data) < 2 { 411 return 0 412 } 413 enc := fseEncoder{} 414 hist := enc.Histogram()[:256] 415 maxSym := uint8(0) 416 for i, v := range data { 417 v = v & 63 418 data[i] = v 419 hist[v]++ 420 if v > maxSym { 421 maxSym = v 422 } 423 } 424 if maxSym == 0 { 425 // All 0 426 return 0 427 } 428 maxCount := func(a []uint32) int { 429 var max uint32 430 for _, v := range a { 431 if v > max { 432 max = v 433 } 434 } 435 return int(max) 436 } 437 cnt := maxCount(hist[:maxSym]) 438 if cnt == len(data) { 439 // RLE 440 return 0 441 } 442 enc.HistogramFinished(maxSym, cnt) 443 err := enc.normalizeCount(len(data)) 444 if err != nil { 445 return 0 446 } 447 _, err = enc.writeCount(nil) 448 if err != nil { 449 panic(err) 450 } 451 return 1 452} 453 454// encode will encode the block and append the output in b.output. 455func (b *blockEnc) encode(raw bool) error { 456 if len(b.sequences) == 0 { 457 return b.encodeLits(raw) 458 } 459 // We want some difference 460 if len(b.literals) > (b.size - (b.size >> 5)) { 461 return errIncompressible 462 } 463 464 var bh blockHeader 465 var lh literalsHeader 466 bh.setLast(b.last) 467 bh.setType(blockTypeCompressed) 468 // Store offset of the block header. Needed when we know the size. 469 bhOffset := len(b.output) 470 b.output = bh.appendTo(b.output) 471 472 var ( 473 out []byte 474 reUsed, single bool 475 err error 476 ) 477 if len(b.literals) >= 1024 && !raw { 478 // Use 4 Streams. 479 out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) 480 } else if len(b.literals) > 32 && !raw { 481 // Use 1 stream 482 single = true 483 out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) 484 } else { 485 err = huff0.ErrIncompressible 486 } 487 488 switch err { 489 case huff0.ErrIncompressible: 490 lh.setType(literalsBlockRaw) 491 lh.setSize(len(b.literals)) 492 b.output = lh.appendTo(b.output) 493 b.output = append(b.output, b.literals...) 494 if debug { 495 println("Adding literals RAW, length", len(b.literals)) 496 } 497 case huff0.ErrUseRLE: 498 lh.setType(literalsBlockRLE) 499 lh.setSize(len(b.literals)) 500 b.output = lh.appendTo(b.output) 501 b.output = append(b.output, b.literals[0]) 502 if debug { 503 println("Adding literals RLE") 504 } 505 default: 506 if debug { 507 println("Adding literals ERROR:", err) 508 } 509 return err 510 case nil: 511 // Compressed litLen... 512 if reUsed { 513 if debug { 514 println("reused tree") 515 } 516 lh.setType(literalsBlockTreeless) 517 } else { 518 if debug { 519 println("new tree, size:", len(b.litEnc.OutTable)) 520 } 521 lh.setType(literalsBlockCompressed) 522 if debug { 523 _, _, err := huff0.ReadTable(out, nil) 524 if err != nil { 525 panic(err) 526 } 527 } 528 } 529 lh.setSizes(len(out), len(b.literals), single) 530 if debug { 531 printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) 532 println("Adding literal header:", lh) 533 } 534 b.output = lh.appendTo(b.output) 535 b.output = append(b.output, out...) 536 b.litEnc.Reuse = huff0.ReusePolicyAllow 537 if debug { 538 println("Adding literals compressed") 539 } 540 } 541 // Sequence compression 542 543 // Write the number of sequences 544 switch { 545 case len(b.sequences) < 128: 546 b.output = append(b.output, uint8(len(b.sequences))) 547 case len(b.sequences) < 0x7f00: // TODO: this could be wrong 548 n := len(b.sequences) 549 b.output = append(b.output, 128+uint8(n>>8), uint8(n)) 550 default: 551 n := len(b.sequences) - 0x7f00 552 b.output = append(b.output, 255, uint8(n), uint8(n>>8)) 553 } 554 if debug { 555 println("Encoding", len(b.sequences), "sequences") 556 } 557 b.genCodes() 558 llEnc := b.coders.llEnc 559 ofEnc := b.coders.ofEnc 560 mlEnc := b.coders.mlEnc 561 err = llEnc.normalizeCount(len(b.sequences)) 562 if err != nil { 563 return err 564 } 565 err = ofEnc.normalizeCount(len(b.sequences)) 566 if err != nil { 567 return err 568 } 569 err = mlEnc.normalizeCount(len(b.sequences)) 570 if err != nil { 571 return err 572 } 573 574 // Choose the best compression mode for each type. 575 // Will evaluate the new vs predefined and previous. 576 chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) { 577 // See if predefined/previous is better 578 hist := cur.count[:cur.symbolLen] 579 nSize := cur.approxSize(hist) + cur.maxHeaderSize() 580 predefSize := preDef.approxSize(hist) 581 prevSize := prev.approxSize(hist) 582 583 // Add a small penalty for new encoders. 584 // Don't bother with extremely small (<2 byte gains). 585 nSize = nSize + (nSize+2*8*16)>>4 586 switch { 587 case predefSize <= prevSize && predefSize <= nSize || forcePreDef: 588 if debug { 589 println("Using predefined", predefSize>>3, "<=", nSize>>3) 590 } 591 return preDef, compModePredefined 592 case prevSize <= nSize: 593 if debug { 594 println("Using previous", prevSize>>3, "<=", nSize>>3) 595 } 596 return prev, compModeRepeat 597 default: 598 if debug { 599 println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") 600 println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) 601 } 602 return cur, compModeFSE 603 } 604 } 605 606 // Write compression mode 607 var mode uint8 608 if llEnc.useRLE { 609 mode |= uint8(compModeRLE) << 6 610 llEnc.setRLE(b.sequences[0].llCode) 611 if debug { 612 println("llEnc.useRLE") 613 } 614 } else { 615 var m seqCompMode 616 llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths]) 617 mode |= uint8(m) << 6 618 } 619 if ofEnc.useRLE { 620 mode |= uint8(compModeRLE) << 4 621 ofEnc.setRLE(b.sequences[0].ofCode) 622 if debug { 623 println("ofEnc.useRLE") 624 } 625 } else { 626 var m seqCompMode 627 ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets]) 628 mode |= uint8(m) << 4 629 } 630 631 if mlEnc.useRLE { 632 mode |= uint8(compModeRLE) << 2 633 mlEnc.setRLE(b.sequences[0].mlCode) 634 if debug { 635 println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) 636 } 637 } else { 638 var m seqCompMode 639 mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths]) 640 mode |= uint8(m) << 2 641 } 642 b.output = append(b.output, mode) 643 if debug { 644 printf("Compression modes: 0b%b", mode) 645 } 646 b.output, err = llEnc.writeCount(b.output) 647 if err != nil { 648 return err 649 } 650 start := len(b.output) 651 b.output, err = ofEnc.writeCount(b.output) 652 if err != nil { 653 return err 654 } 655 if false { 656 println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount) 657 fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen) 658 for i, v := range ofEnc.norm[:ofEnc.symbolLen] { 659 fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v) 660 } 661 } 662 b.output, err = mlEnc.writeCount(b.output) 663 if err != nil { 664 return err 665 } 666 667 // Maybe in block? 668 wr := &b.wr 669 wr.reset(b.output) 670 671 var ll, of, ml cState 672 673 // Current sequence 674 seq := len(b.sequences) - 1 675 s := b.sequences[seq] 676 llEnc.setBits(llBitsTable[:]) 677 mlEnc.setBits(mlBitsTable[:]) 678 ofEnc.setBits(nil) 679 680 llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256] 681 682 // We have 3 bounds checks here (and in the loop). 683 // Since we are iterating backwards it is kinda hard to avoid. 684 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] 685 ll.init(wr, &llEnc.ct, llB) 686 of.init(wr, &ofEnc.ct, ofB) 687 wr.flush32() 688 ml.init(wr, &mlEnc.ct, mlB) 689 690 // Each of these lookups also generates a bounds check. 691 wr.addBits32NC(s.litLen, llB.outBits) 692 wr.addBits32NC(s.matchLen, mlB.outBits) 693 wr.flush32() 694 wr.addBits32NC(s.offset, ofB.outBits) 695 if debugSequences { 696 println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) 697 } 698 seq-- 699 if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 { 700 // No need to flush (common) 701 for seq >= 0 { 702 s = b.sequences[seq] 703 wr.flush32() 704 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] 705 // tabelog max is 8 for all. 706 of.encode(ofB) 707 ml.encode(mlB) 708 ll.encode(llB) 709 wr.flush32() 710 711 // We checked that all can stay within 32 bits 712 wr.addBits32NC(s.litLen, llB.outBits) 713 wr.addBits32NC(s.matchLen, mlB.outBits) 714 wr.addBits32NC(s.offset, ofB.outBits) 715 716 if debugSequences { 717 println("Encoded seq", seq, s) 718 } 719 720 seq-- 721 } 722 } else { 723 for seq >= 0 { 724 s = b.sequences[seq] 725 wr.flush32() 726 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] 727 // tabelog max is below 8 for each. 728 of.encode(ofB) 729 ml.encode(mlB) 730 ll.encode(llB) 731 wr.flush32() 732 733 // ml+ll = max 32 bits total 734 wr.addBits32NC(s.litLen, llB.outBits) 735 wr.addBits32NC(s.matchLen, mlB.outBits) 736 wr.flush32() 737 wr.addBits32NC(s.offset, ofB.outBits) 738 739 if debugSequences { 740 println("Encoded seq", seq, s) 741 } 742 743 seq-- 744 } 745 } 746 ml.flush(mlEnc.actualTableLog) 747 of.flush(ofEnc.actualTableLog) 748 ll.flush(llEnc.actualTableLog) 749 err = wr.close() 750 if err != nil { 751 return err 752 } 753 b.output = wr.out 754 755 if len(b.output)-3-bhOffset >= b.size { 756 // Maybe even add a bigger margin. 757 b.litEnc.Reuse = huff0.ReusePolicyNone 758 return errIncompressible 759 } 760 761 // Size is output minus block header. 762 bh.setSize(uint32(len(b.output)-bhOffset) - 3) 763 if debug { 764 println("Rewriting block header", bh) 765 } 766 _ = bh.appendTo(b.output[bhOffset:bhOffset]) 767 b.coders.setPrev(llEnc, mlEnc, ofEnc) 768 return nil 769} 770 771var errIncompressible = errors.New("incompressible") 772 773func (b *blockEnc) genCodes() { 774 if len(b.sequences) == 0 { 775 // nothing to do 776 return 777 } 778 779 if len(b.sequences) > math.MaxUint16 { 780 panic("can only encode up to 64K sequences") 781 } 782 // No bounds checks after here: 783 llH := b.coders.llEnc.Histogram()[:256] 784 ofH := b.coders.ofEnc.Histogram()[:256] 785 mlH := b.coders.mlEnc.Histogram()[:256] 786 for i := range llH { 787 llH[i] = 0 788 } 789 for i := range ofH { 790 ofH[i] = 0 791 } 792 for i := range mlH { 793 mlH[i] = 0 794 } 795 796 var llMax, ofMax, mlMax uint8 797 for i, seq := range b.sequences { 798 v := llCode(seq.litLen) 799 seq.llCode = v 800 llH[v]++ 801 if v > llMax { 802 llMax = v 803 } 804 805 v = ofCode(seq.offset) 806 seq.ofCode = v 807 ofH[v]++ 808 if v > ofMax { 809 ofMax = v 810 } 811 812 v = mlCode(seq.matchLen) 813 seq.mlCode = v 814 mlH[v]++ 815 if v > mlMax { 816 mlMax = v 817 if debug && mlMax > maxMatchLengthSymbol { 818 panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) 819 } 820 } 821 b.sequences[i] = seq 822 } 823 maxCount := func(a []uint32) int { 824 var max uint32 825 for _, v := range a { 826 if v > max { 827 max = v 828 } 829 } 830 return int(max) 831 } 832 if mlMax > maxMatchLengthSymbol { 833 panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax)) 834 } 835 if ofMax > maxOffsetBits { 836 panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax)) 837 } 838 if llMax > maxLiteralLengthSymbol { 839 panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax)) 840 } 841 842 b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1])) 843 b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1])) 844 b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1])) 845} 846