1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5// Package bzip2 implements the BZip2 compressed data format. 6// 7// Canonical C implementation: 8// http://bzip.org 9// 10// Unofficial format specification: 11// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf 12package bzip2 13 14import ( 15 "fmt" 16 "hash/crc32" 17 18 "github.com/dsnet/compress/internal" 19 "github.com/dsnet/compress/internal/errors" 20) 21 22// There does not exist a formal specification of the BZip2 format. As such, 23// much of this work is derived by either reverse engineering the original C 24// source code or using secondary sources. 25// 26// Significant amounts of fuzz testing is done to ensure that outputs from 27// this package is properly decoded by the C library. Furthermore, we test that 28// both this package and the C library agree about what inputs are invalid. 29// 30// Compression stack: 31// Run-length encoding 1 (RLE1) 32// Burrows-Wheeler transform (BWT) 33// Move-to-front transform (MTF) 34// Run-length encoding 2 (RLE2) 35// Prefix encoding (PE) 36// 37// References: 38// http://bzip.org/ 39// https://en.wikipedia.org/wiki/Bzip2 40// https://code.google.com/p/jbzip2/ 41 42const ( 43 BestSpeed = 1 44 BestCompression = 9 45 DefaultCompression = 6 46) 47 48const ( 49 hdrMagic = 0x425a // Hex of "BZ" 50 blkMagic = 0x314159265359 // BCD of PI 51 endMagic = 0x177245385090 // BCD of sqrt(PI) 52 53 blockSize = 100000 54) 55 56func errorf(c int, f string, a ...interface{}) error { 57 return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)} 58} 59 60func panicf(c int, f string, a ...interface{}) { 61 errors.Panic(errorf(c, f, a...)) 62} 63 64// errWrap converts a lower-level errors.Error to be one from this package. 65// The replaceCode passed in will be used to replace the code for any errors 66// with the errors.Invalid code. 67// 68// For the Reader, set this to errors.Corrupted. 69// For the Writer, set this to errors.Internal. 70func errWrap(err error, replaceCode int) error { 71 if cerr, ok := err.(errors.Error); ok { 72 if errors.IsInvalid(cerr) { 73 cerr.Code = replaceCode 74 } 75 err = errorf(cerr.Code, "%s", cerr.Msg) 76 } 77 return err 78} 79 80var errClosed = errorf(errors.Closed, "") 81 82// crc computes the CRC-32 used by BZip2. 83// 84// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian 85// order. That is, the MSB is read before the LSB. Thus, we can use the 86// standard library version of CRC-32 IEEE with some minor adjustments. 87// 88// The byte array is used as an intermediate buffer to swap the bits of every 89// byte of the input. 90type crc struct { 91 val uint32 92 buf [256]byte 93} 94 95// update computes the CRC-32 of appending buf to c. 96func (c *crc) update(buf []byte) { 97 cval := internal.ReverseUint32(c.val) 98 for len(buf) > 0 { 99 n := len(buf) 100 if n > len(c.buf) { 101 n = len(c.buf) 102 } 103 for i, b := range buf[:n] { 104 c.buf[i] = internal.ReverseLUT[b] 105 } 106 cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n]) 107 buf = buf[n:] 108 } 109 c.val = internal.ReverseUint32(cval) 110} 111